xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
16 
17 #include <algorithm>
18 #include <cinttypes>
19 #include <cstdarg>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstring>
24 #include <functional>
25 #include <initializer_list>
26 #include <iostream>
27 #include <iterator>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35 
36 #include "tensorflow/lite/c/c_api_types.h"
37 #include "tensorflow/lite/delegates/serialization.h"
38 #include "tensorflow/lite/logger.h"
39 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
40 #include "tensorflow/lite/nnapi/sl/public/NeuralNetworksSupportLibraryImpl.h"
41 
42 #ifdef __ANDROID__
43 #include <sys/system_properties.h>
44 #endif
45 
46 #if defined __ANDROID__ || defined __unix__
47 #define TFLITE_NNAPI_ALLOW_MMAP_SHARING
48 #include <sys/mman.h>
49 #include <unistd.h>
50 #endif
51 
52 #include "fp16.h"  // from @FP16
53 #include "tensorflow/lite/allocation.h"
54 #include "tensorflow/lite/builtin_op_data.h"
55 #include "tensorflow/lite/builtin_ops.h"
56 #include "tensorflow/lite/c/builtin_op_data.h"
57 #include "tensorflow/lite/c/common.h"
58 #include "tensorflow/lite/context_util.h"
59 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
60 #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
61 #include "tensorflow/lite/delegates/utils.h"
62 #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h"
63 #include "tensorflow/lite/kernels/kernel_util.h"
64 #include "tensorflow/lite/minimal_logging.h"
65 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
66 #include "tensorflow/lite/nnapi/nnapi_util.h"
67 #include "tensorflow/lite/util.h"
68 #ifdef NNAPI_VERBOSE_VALIDATION
69 #include "tensorflow/lite/schema/schema_generated.h"
70 #endif
71 #include "utils/hash/farmhash.h"
72 
73 namespace tflite {
74 namespace {
75 
76 static const char kNnapiId[] = "nnapi_";
77 constexpr uint64_t kNoMemoryTimestamp = 0;
78 
79 // Returns a string ID unique to what accelerator is run by NNAPI, based on
80 // user params. Assumes that the default accelerator is same across runs.
81 // Used for caching nodes to be delegated for a model.
NnApiBackendId(const StatefulNnApiDelegate::Options & delegate_options)82 std::string NnApiBackendId(
83     const StatefulNnApiDelegate::Options& delegate_options) {
84   std::string delegate_id = kNnapiId;
85   if (delegate_options.accelerator_name) {
86     delegate_id += delegate_options.accelerator_name;
87   }
88   return delegate_id;
89 }
90 
91 // Returns the enum name corresponding to the given error code if the given
92 // value corresponds to an of the error codes in the enumeration above or
93 // an message with the unknown code.
94 // LINT.IfChange(NnApiErrorDescription)
NnApiErrorDescription(int error_code)95 std::string NnApiErrorDescription(int error_code) {
96   switch (error_code) {
97     case ANEURALNETWORKS_NO_ERROR:
98       return "ANEURALNETWORKS_NO_ERROR";
99     case ANEURALNETWORKS_OUT_OF_MEMORY:
100       return "ANEURALNETWORKS_OUT_OF_MEMORY";
101     case ANEURALNETWORKS_INCOMPLETE:
102       return "ANEURALNETWORKS_INCOMPLETE";
103     case ANEURALNETWORKS_UNEXPECTED_NULL:
104       return "ANEURALNETWORKS_UNEXPECTED_NULL";
105     case ANEURALNETWORKS_BAD_DATA:
106       return "ANEURALNETWORKS_BAD_DATA";
107     case ANEURALNETWORKS_OP_FAILED:
108       return "ANEURALNETWORKS_OP_FAILED";
109     case ANEURALNETWORKS_BAD_STATE:
110       return "ANEURALNETWORKS_BAD_STATE";
111     case ANEURALNETWORKS_UNMAPPABLE:
112       return "ANEURALNETWORKS_UNMAPPABLE";
113     case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
114       return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
115     case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
116       return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
117     case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
118       return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
119     case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
120       return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
121     case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
122       return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
123     case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
124       return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
125     case ANEURALNETWORKS_DEAD_OBJECT:
126       return "ANEURALNETWORKS_DEAD_OBJECT";
127     default:
128       return "Unknown NNAPI error code: " + std::to_string(error_code);
129   }
130 }
131 // LINT.ThenChange()
132 
133 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno)  \
134   do {                                                                      \
135     const auto _code = (code);                                              \
136     const auto _call_desc = (call_desc);                                    \
137     if (_code != ANEURALNETWORKS_NO_ERROR) {                                \
138       const auto error_desc = NnApiErrorDescription(_code);                 \
139       TF_LITE_KERNEL_LOG(context,                                           \
140                          "NN API returned error %s at line %d while %s.\n", \
141                          error_desc.c_str(), __LINE__, _call_desc);         \
142       *p_errno = _code;                                                     \
143       return kTfLiteError;                                                  \
144     }                                                                       \
145   } while (0)
146 
147 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
148                                                    p_tensor, p_errno)        \
149   do {                                                                       \
150     const auto _code = (code);                                               \
151     const auto _call_desc = (call_desc);                                     \
152     if (_code != ANEURALNETWORKS_NO_ERROR) {                                 \
153       const auto error_desc = NnApiErrorDescription(_code);                  \
154       TF_LITE_KERNEL_LOG(context,                                            \
155                          "NN API returned error %s at line %d while %s "     \
156                          "for tensor '%s'.\n",                               \
157                          error_desc.c_str(), __LINE__, _call_desc,           \
158                          (p_tensor)->name ? (p_tensor)->name : "no-name");   \
159       *p_errno = _code;                                                      \
160       return kTfLiteError;                                                   \
161     }                                                                        \
162   } while (0)
163 
IsFloat(TfLiteType type)164 bool IsFloat(TfLiteType type) {
165   switch (type) {
166     case kTfLiteFloat32:
167       return true;
168     default:
169       return false;
170   }
171 }
172 
IsFloatOrUInt8(TfLiteType type)173 bool IsFloatOrUInt8(TfLiteType type) {
174   switch (type) {
175     case kTfLiteFloat32:
176     case kTfLiteUInt8:
177       return true;
178     default:
179       return false;
180   }
181 }
182 
IsQuantized(TfLiteType type)183 bool IsQuantized(TfLiteType type) {
184   switch (type) {
185     case kTfLiteUInt8:
186     case kTfLiteInt8:
187       return true;
188     default:
189       // kTfLiteInt16 isn't supported as quantized type yet.
190       return false;
191   }
192 }
193 
IsInt32(TfLiteType type)194 bool IsInt32(TfLiteType type) {
195   switch (type) {
196     case kTfLiteInt32:
197       return true;
198     default:
199       return false;
200   }
201 }
202 
IsFloatOrQuantized(TfLiteType type)203 bool IsFloatOrQuantized(TfLiteType type) {
204   switch (type) {
205     case kTfLiteFloat32:
206     case kTfLiteUInt8:
207     case kTfLiteInt8:
208       return true;
209     default:
210       return false;
211   }
212 }
213 
IsFloatOrInt32(TfLiteType type)214 bool IsFloatOrInt32(TfLiteType type) {
215   switch (type) {
216     case kTfLiteFloat32:
217     case kTfLiteInt32:
218       return true;
219     default:
220       return false;
221   }
222 }
223 
IsFloatQuantizedOrInt32(TfLiteType type)224 bool IsFloatQuantizedOrInt32(TfLiteType type) {
225   switch (type) {
226     case kTfLiteFloat32:
227     case kTfLiteUInt8:
228     case kTfLiteInt8:
229     case kTfLiteInt32:
230       return true;
231     default:
232       return false;
233   }
234 }
235 
IsScalarInputSupported(int builtin_code)236 bool IsScalarInputSupported(int builtin_code) {
237   switch (builtin_code) {
238     case kTfLiteBuiltinAdd:
239     case kTfLiteBuiltinMul:
240     case kTfLiteBuiltinSub:
241     case kTfLiteBuiltinDiv:
242     case kTfLiteBuiltinEqual:
243     case kTfLiteBuiltinNotEqual:
244     case kTfLiteBuiltinGreater:
245     case kTfLiteBuiltinGreaterEqual:
246     case kTfLiteBuiltinLess:
247     case kTfLiteBuiltinLessEqual:
248     case kTfLiteBuiltinPow:
249     case kTfLiteBuiltinMaximum:
250     case kTfLiteBuiltinMinimum:
251     case kTfLiteBuiltinPrelu:
252     case kTfLiteBuiltinLeakyRelu:
253       return true;
254     default:
255       return false;
256   }
257 }
258 
259 // Check if the operation requires explicit conversion from int8 to uint8
260 // values.
NeedInt8Conversion(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)261 bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
262                         const TfLiteNode* node) {
263   const int input_id = node->inputs->data[0];
264   const TfLiteType input_type = context->tensors[input_id].type;
265   switch (builtin_code) {
266     case kTfLiteBuiltinConv2d:
267     case kTfLiteBuiltinDepthwiseConv2d:
268     case kTfLiteBuiltinFullyConnected: {
269       if (input_type == kTfLiteInt8) {
270         const int weights_id = node->inputs->data[1];
271         const auto& weights_tensor = context->tensors[weights_id];
272         if ((weights_tensor.type == kTfLiteInt8 ||
273              weights_tensor.type == kTfLiteUInt8) &&
274             weights_tensor.quantization.type == kTfLiteAffineQuantization) {
275           return true;
276         }
277       }
278       return false;
279     }
280     case kTfLiteBuiltinTransposeConv: {
281       // Transpose convolution has a different order of inputs:
282       // 0: output_shape, 1: filter, 2: input, 3: bias.
283       const int input_id = 2;
284       const TfLiteType input_type = context->tensors[input_id].type;
285       if (input_type == kTfLiteInt8) {
286         return true;
287       }
288       return false;
289     }
290     case kTfLiteBuiltinSelect: {
291       const auto value_type = context->tensors[node->inputs->data[1]].type;
292       return value_type == kTfLiteInt8;
293     }
294     case kTfLiteBuiltinAdd:
295     case kTfLiteBuiltinArgMax:
296     case kTfLiteBuiltinArgMin:
297     case kTfLiteBuiltinAveragePool2d:
298     case kTfLiteBuiltinBatchToSpaceNd:
299     case kTfLiteBuiltinConcatenation:
300     case kTfLiteBuiltinEqual:
301     case kTfLiteBuiltinExpandDims:
302     case kTfLiteBuiltinGather:
303     case kTfLiteBuiltinGreater:
304     case kTfLiteBuiltinGreaterEqual:
305     case kTfLiteBuiltinHardSwish:
306     case kTfLiteBuiltinL2Normalization:
307     case kTfLiteBuiltinLeakyRelu:
308     case kTfLiteBuiltinLess:
309     case kTfLiteBuiltinLessEqual:
310     case kTfLiteBuiltinLogistic:
311     case kTfLiteBuiltinMaximum:
312     case kTfLiteBuiltinMaxPool2d:
313     case kTfLiteBuiltinMean:
314     case kTfLiteBuiltinMinimum:
315     case kTfLiteBuiltinMul:
316     case kTfLiteBuiltinNotEqual:
317     case kTfLiteBuiltinPad:
318     case kTfLiteBuiltinPadv2:
319     case kTfLiteBuiltinPrelu:
320     case kTfLiteBuiltinReduceMax:
321     case kTfLiteBuiltinReduceMin:
322     case kTfLiteBuiltinRelu:
323     case kTfLiteBuiltinReluN1To1:
324     case kTfLiteBuiltinRelu6:
325     case kTfLiteBuiltinResizeBilinear:
326     case kTfLiteBuiltinResizeNearestNeighbor:
327     case kTfLiteBuiltinReshape:
328     case kTfLiteBuiltinSlice:
329     case kTfLiteBuiltinSoftmax:
330     case kTfLiteBuiltinSpaceToBatchNd:
331     case kTfLiteBuiltinSpaceToDepth:
332     case kTfLiteBuiltinDepthToSpace:
333     case kTfLiteBuiltinStridedSlice:
334     case kTfLiteBuiltinSub:
335     case kTfLiteBuiltinTanh:
336     case kTfLiteBuiltinTile:
337     case kTfLiteBuiltinTopkV2:
338     case kTfLiteBuiltinTranspose: {
339       return input_type == kTfLiteInt8;
340     }
341     default:
342       return false;
343   }
344 }
345 
346 constexpr int kLstmFullKernelInputSize = 24;
347 // The 20 input version is deprecated and kept only to
348 // support old model. The latest version of the LSTM Full Kernel
349 // is the one with 24 inputs
350 constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
351 constexpr int kLstmBasicKernelInputSize = 5;
352 
isLstmBasicKernel(const TfLiteNode * node)353 inline bool isLstmBasicKernel(const TfLiteNode* node) {
354   return node->inputs->size == kLstmBasicKernelInputSize;
355 }
356 
isLstmFullKernel(const TfLiteNode * node)357 inline bool isLstmFullKernel(const TfLiteNode* node) {
358   return node->inputs->size == kLstmFullKernelInputSize ||
359          node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
360 }
361 
IsMeanWithDifferentInputOutputQuantization(const TfLiteContext * context,const TfLiteNode * node)362 bool IsMeanWithDifferentInputOutputQuantization(const TfLiteContext* context,
363                                                 const TfLiteNode* node) {
364   const auto& input = context->tensors[node->inputs->data[0]];
365   const auto& output = context->tensors[node->outputs->data[0]];
366   return input.params.scale != output.params.scale ||
367          input.params.zero_point != output.params.zero_point;
368 }
369 
IsBroadcastBatchMatMul(const TfLiteContext * context,const TfLiteNode * node)370 bool IsBroadcastBatchMatMul(const TfLiteContext* context,
371                             const TfLiteNode* node) {
372   const auto& input0 = context->tensors[node->inputs->data[0]];
373   const auto& input1 = context->tensors[node->inputs->data[1]];
374   if (input0.dims->size != input1.dims->size) {
375     return true;
376   }
377   for (int i = 0; i < input0.dims->size - 2; i++) {
378     if (input0.dims->data[i] != input1.dims->data[i]) {
379       return true;
380     }
381   }
382   return false;
383 }
384 
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)385 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
386                       const TfLiteNode* node) {
387   switch (builtin_code) {
388     case kTfLiteBuiltinConv2d:
389     case kTfLiteBuiltinFullyConnected: {
390       const int input_id = node->inputs->data[0];
391       const int filter_id = node->inputs->data[1];
392       const TfLiteType input_type = context->tensors[input_id].type;
393       const TfLiteType filter_type = context->tensors[filter_id].type;
394       return IsFloat(input_type) && IsQuantized(filter_type);
395     }
396     case kTfLiteBuiltinLstm: {
397       const int input_id = node->inputs->data[0];
398       // Input #1 is optional so use #2 to determine if hybrid.
399       const int weights_id = node->inputs->data[2];
400       const TfLiteType input_type = context->tensors[input_id].type;
401       const TfLiteType weights_type = context->tensors[weights_id].type;
402       return isLstmFullKernel(node) && IsFloat(input_type) &&
403              IsQuantized(weights_type);
404     }
405     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
406       const int input_id = node->inputs->data[0];
407       // Input #1 is optional so use #2 to determine if hybrid.
408       const int weights_id = node->inputs->data[2];
409       const TfLiteType input_type = context->tensors[input_id].type;
410       const TfLiteType weights_type = context->tensors[weights_id].type;
411       return IsFloat(input_type) && IsQuantized(weights_type);
412     }
413     case kTfLiteBuiltinBidirectionalSequenceLstm: {
414       const int input_id = node->inputs->data[0];
415       // Input #1 is optional so use #2 to determine if hybrid.
416       const int weights_id = node->inputs->data[2];
417       const TfLiteType input_type = context->tensors[input_id].type;
418       const TfLiteType weights_type = context->tensors[weights_id].type;
419       return IsFloat(input_type) && IsQuantized(weights_type);
420     }
421     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
422       const int input_id = node->inputs->data[0];
423       const int weights_id = node->inputs->data[1];
424       const TfLiteType input_type = context->tensors[input_id].type;
425       const TfLiteType weights_type = context->tensors[weights_id].type;
426       return IsFloat(input_type) && IsQuantized(weights_type);
427     }
428     default:
429       return false;
430   }
431 }
432 
IsDequantizeConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)433 bool IsDequantizeConstFloat16(TfLiteContext* context, const TfLiteNode* node,
434                               const TfLiteRegistration* registration) {
435   return registration->builtin_code == kTfLiteBuiltinDequantize &&
436          context->tensors[node->inputs->data[0]].type ==
437              TfLiteType::kTfLiteFloat16 &&
438          IsConstantTensor(&context->tensors[node->inputs->data[0]]);
439 }
440 
IsDequantizeNonConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)441 bool IsDequantizeNonConstFloat16(TfLiteContext* context, const TfLiteNode* node,
442                                  const TfLiteRegistration* registration) {
443   return registration->builtin_code == kTfLiteBuiltinDequantize &&
444          context->tensors[node->inputs->data[0]].type ==
445              TfLiteType::kTfLiteFloat16 &&
446          !IsConstantTensor(&context->tensors[node->inputs->data[0]]);
447 }
448 
IsDensifyConstTensor(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)449 bool IsDensifyConstTensor(TfLiteContext* context, const TfLiteNode* node,
450                           const TfLiteRegistration* registration) {
451   return registration->builtin_code == kTfLiteBuiltinDensify &&
452          IsConstantTensor(&context->tensors[node->inputs->data[0]]);
453 }
454 
ConvertTensorTypeToNNType(const TfLiteTensor * tensor,TfLiteType ann_type_equivalent,bool use_int8_asymm_signed)455 ANeuralNetworksOperandType ConvertTensorTypeToNNType(
456     const TfLiteTensor* tensor, TfLiteType ann_type_equivalent,
457     bool use_int8_asymm_signed) {
458   int32_t nn_type = 0;
459   float scale = 0.0f;
460   int32_t zero_point = 0;
461   switch (tensor->type) {
462     case kTfLiteFloat32:
463       nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
464       break;
465     case kTfLiteUInt8:
466       nn_type = ann_type_equivalent == kTfLiteInt32
467                     ? ANEURALNETWORKS_TENSOR_INT32
468                     : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
469       scale = tensor->params.scale;
470       zero_point = tensor->params.zero_point;
471       if (scale == 0) {
472         // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
473         // with zero scale are not valid in NNAPI.
474         scale = 1;
475       }
476       break;
477     case kTfLiteInt8:
478       scale = tensor->params.scale;
479       zero_point = tensor->params.zero_point;
480       if (use_int8_asymm_signed) {
481         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
482       } else if (ann_type_equivalent == kTfLiteUInt8) {
483         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
484         zero_point += 128;
485       } else if (ann_type_equivalent == kTfLiteInt32) {
486         nn_type = ANEURALNETWORKS_TENSOR_INT32;
487         zero_point += 128;
488       } else {
489         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
490       }
491       if (scale == 0) {
492         // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
493         // with zero scale are not valid in NNAPI.
494         scale = 1;
495       }
496       break;
497     case kTfLiteInt32:
498       nn_type = ANEURALNETWORKS_TENSOR_INT32;
499       scale = tensor->params.scale;
500       zero_point = tensor->params.zero_point;
501       break;
502     case kTfLiteBool:
503       nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
504       break;
505     case kTfLiteInt16:
506       nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
507       scale = tensor->params.scale;
508       zero_point = tensor->params.zero_point;
509       break;
510     default:
511       break;
512   }
513   uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
514   uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
515   static uint32_t scalar_rank = 1;
516   // treat scalar input as single cell tensor in NNAPI.
517   if (tensor_rank == 0) {
518     tensor_rank = scalar_rank;
519     tensor_dims = &scalar_rank;
520   }
521   ANeuralNetworksOperandType nn_operand_type{
522       .type = nn_type,
523       .dimensionCount = tensor_rank,
524       .dimensions = tensor_dims,
525       .scale = scale,
526       .zeroPoint = zero_point,
527   };
528   return nn_operand_type;
529 }
530 
531 // NNAPI in API 31 hard-code the preferred alignment/padding with 64 bytes.
532 constexpr size_t kDefaultByteAlignmentForNNAPI = 64;
533 
GetNumPaddingBytes(size_t byte_size)534 static size_t GetNumPaddingBytes(size_t byte_size) {
535   size_t num_padding_bytes = 0;
536   if (byte_size % kDefaultByteAlignmentForNNAPI) {
537     num_padding_bytes = kDefaultByteAlignmentForNNAPI -
538                         (byte_size % kDefaultByteAlignmentForNNAPI);
539   }
540   return num_padding_bytes;
541 }
542 
GetNNTensorSize(size_t tensor_size,bool allow_padding)543 static size_t GetNNTensorSize(size_t tensor_size, bool allow_padding) {
544   size_t padding_bytes = GetNumPaddingBytes(tensor_size);
545   size_t nn_tensor_size = tensor_size;
546   if (allow_padding) {
547     nn_tensor_size += padding_bytes;
548   }
549   return nn_tensor_size;
550 }
551 
552 // Return NNAPI device handle with the provided null-terminated device name.
553 // Returns kTfLiteError in case of any NNAPI error and if no device with the
554 // given name can be found.
GetDeviceHandle(const NnApi * nnapi,TfLiteContext * context,const char * device_name_ptr,ANeuralNetworksDevice ** result,int * nnapi_errno)555 TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
556                              const char* device_name_ptr,
557                              ANeuralNetworksDevice** result, int* nnapi_errno) {
558   if (!device_name_ptr) return kTfLiteError;
559   *result = nullptr;
560   std::string device_name(device_name_ptr);
561   uint32_t num_devices = 0;
562   nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
563 
564   for (uint32_t i = 0; i < num_devices; i++) {
565     ANeuralNetworksDevice* device = nullptr;
566     const char* buffer = nullptr;
567     RETURN_TFLITE_ERROR_IF_NN_ERROR(
568         context, nnapi->ANeuralNetworks_getDevice(i, &device),
569         "Searching for target device", nnapi_errno);
570 
571     RETURN_TFLITE_ERROR_IF_NN_ERROR(
572         context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
573         "Searching for target device", nnapi_errno);
574 
575     if (device_name == buffer) {
576       *result = device;
577       return kTfLiteOk;
578     }
579   }
580 
581   TF_LITE_KERNEL_LOG(context,
582                      "Could not find the specified NNAPI accelerator: %s. "
583                      "Must be one of: {%s}.",
584                      device_name_ptr,
585                      nnapi::GetStringDeviceNamesList(nnapi).c_str());
586   return kTfLiteError;
587 }
588 
589 // Compute the hash of a TfLiteIntArray.
GetHash(const TfLiteIntArray * int_array,uint64_t combine_with=0)590 uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
591   constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
592   uint64_t result = combine_with;
593   for (auto i : TfLiteIntArrayView(int_array)) {
594     result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
595   }
596   return result;
597 }
598 
HasZeroes(TfLiteIntArrayView array)599 bool HasZeroes(TfLiteIntArrayView array) {
600   for (auto value : array) {
601     if (value == 0) {
602       return true;
603     }
604   }
605   return false;
606 }
607 
608 // In SPLIT_V, it is legal to specify -1 in size_splits representing an unknown
609 // split size taking as many values as possible. This function computes and
610 // returns the actual value of this unknown size, or returns -1 if all split
611 // sizes are known. The caller is responsible for making sure the size_splits
612 // and axis tensor are constants.
ComputeSplitVUnknownSplitSize(const TfLiteContext * context,const TfLiteNode * node)613 int ComputeSplitVUnknownSplitSize(const TfLiteContext* context,
614                                   const TfLiteNode* node) {
615   const auto& input = context->tensors[node->inputs->data[0]];
616   const auto& size_splits_tensor = context->tensors[node->inputs->data[1]];
617   const auto& axis_tensor = context->tensors[node->inputs->data[2]];
618 
619   const auto* size_splits = size_splits_tensor.data.i32;
620   int num_splits = size_splits_tensor.dims->data[0];
621   bool has_unknown_split_size = false;
622   int sum_of_known_split_sizes = 0;
623   for (int i = 0; i < num_splits; i++) {
624     if (size_splits[i] == -1) {
625       has_unknown_split_size = true;
626     } else {
627       sum_of_known_split_sizes += size_splits[i];
628     }
629   }
630 
631   int axis = axis_tensor.data.i32[0];
632   axis = axis < 0 ? axis + input.dims->size : axis;
633   int total_size = input.dims->data[axis];
634   return has_unknown_split_size ? total_size - sum_of_known_split_sizes : -1;
635 }
636 
637 // Bit mask for tensor flags.
638 enum {
639   NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
640   NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
641   NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
642   NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
643   NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION = 1U << 4,
644 };
645 
646 // Returns the feature level to target when delegating to the given devices.
647 // The feature level is the max of the ones supported by the devices or
648 // the current NNAPI runtime feature level if no device is present.
GetTargetFeatureLevel(TfLiteContext * context,const NnApi * nnapi,const std::vector<ANeuralNetworksDevice * > & device_handles,int * target_feature_level,int * nnapi_errno)649 TfLiteStatus GetTargetFeatureLevel(
650     TfLiteContext* context, const NnApi* nnapi,
651     const std::vector<ANeuralNetworksDevice*>& device_handles,
652     int* target_feature_level, int* nnapi_errno) {
653   *target_feature_level = nnapi->nnapi_runtime_feature_level;
654   int64_t devices_feature_level = -1;
655   for (const auto* device_handle : device_handles) {
656     int64_t curr_device_feature_level;
657     RETURN_TFLITE_ERROR_IF_NN_ERROR(
658         context,
659         nnapi->ANeuralNetworksDevice_getFeatureLevel(
660             device_handle, &curr_device_feature_level),
661         "Searching for target device", nnapi_errno);
662 
663     devices_feature_level =
664         std::max(curr_device_feature_level, devices_feature_level);
665   }
666 
667   if ((devices_feature_level > 0) &&
668       // This second check is necessary since if the nnapi-reference device is
669       // in the list of target devices the devices_feature_level value will be
670       // 1000.
671       (devices_feature_level < nnapi->nnapi_runtime_feature_level)) {
672     TFLITE_LOG(TFLITE_LOG_INFO,
673                "Changing NNAPI Feature Level %lld to "
674                "supported by target devices: %lld",
675                nnapi->android_sdk_version, devices_feature_level);
676 
677     *target_feature_level = devices_feature_level;
678   }
679 
680   return kTfLiteOk;
681 }
682 
683 // Returns true if this delegate is configured to use a specific set of devices.
684 // This will happen either if:
685 // - accelerator_name option has been specified
686 // - NNAPI CPU implementation has been explicitly disabled.
687 // If exclude_nnapi_reference is true this method will return false if the
688 // accelerator_name in the delegate options is equal to "nnapi-reference"
ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,const NnApi * nnapi,bool exclude_nnapi_reference=false)689 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
690                             const NnApi* nnapi,
691                             bool exclude_nnapi_reference = false) {
692   const char* device_name_ptr = delegate_options.accelerator_name;
693   std::string nnapi_cpu("nnapi-reference");
694   bool has_selected_accelerator = device_name_ptr != nullptr;
695   if (exclude_nnapi_reference && has_selected_accelerator) {
696     if (nnapi_cpu == device_name_ptr) return false;
697   }
698   return (delegate_options.disallow_nnapi_cpu &&
699           nnapi->android_sdk_version >=
700               delegate::nnapi::kMinSdkVersionForNNAPI12) ||
701          has_selected_accelerator;
702 }
703 
704 // Fills the given result vector with the list of devices the given delegate
705 // is referring to.
706 // There are three possible results:
707 // - an empty array (not the full list of available accelerators,
708 //   for efficiency reasons) if no accelerator is chosen and the
709 //   disallow_nnapi_cpu delegate option is false.
710 // - A single element array with the target processor, if an accelerator name
711 //   is specified in the delegate options.
712 // - The full list of devices available on device less the nnapi reference
713 //   implementation if the delegate option disallow_nnapi_cpu has been
714 //   specified.
GetTargetDevices(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,int * nnapi_errno,std::vector<ANeuralNetworksDevice * > * result)715 TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
716                               const NnApi* nnapi, int* nnapi_errno,
717                               std::vector<ANeuralNetworksDevice*>* result) {
718   if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
719     return kTfLiteError;
720   }
721 
722   const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
723   const char* device_name_ptr = delegate_options.accelerator_name;
724 
725   if (device_name_ptr != nullptr) {
726     // User specified an accelerator to use.
727     ANeuralNetworksDevice* nnapi_device = nullptr;
728     TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
729                                           &nnapi_device, nnapi_errno));
730     result->push_back(nnapi_device);
731   } else if (delegate_options.disallow_nnapi_cpu) {
732     std::string nnapi_cpu("nnapi-reference");
733     uint32_t num_devices = 0;
734     nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
735 
736     for (uint32_t i = 0; i < num_devices; i++) {
737       ANeuralNetworksDevice* device = nullptr;
738       const char* buffer = nullptr;
739       RETURN_TFLITE_ERROR_IF_NN_ERROR(
740           context, nnapi->ANeuralNetworks_getDevice(i, &device),
741           "Getting list of available devices", nnapi_errno);
742       RETURN_TFLITE_ERROR_IF_NN_ERROR(
743           context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
744           "Getting list of available devices", nnapi_errno);
745       if (nnapi_cpu != buffer) {
746         result->push_back(device);
747       }
748     }
749   }
750 
751   return kTfLiteOk;
752 }
753 
754 // The context to be used with NnapiMappingUtilCInterface.
755 class NnapiMappingContext {
756  public:
757   // Next index of ann tensor
758   int next_ann_tensor_index_ = 0;
759   // Mapping from lite tensor index.
760   std::vector<int> lite_tensor_to_ann_tensor_;
761   // Mapping from lite index to a type which tensor must be converted to during
762   // the copying of the data to the memory allocated for NN API. kTfLiteNoType
763   // means no conversion is needed.
764   std::vector<int> index_to_type_conversion_;
765   // Mapping from lite node index.
766   std::vector<int> nnapi_to_tflite_op_mapping_;
767 };
768 
769 }  // namespace
770 
771 namespace delegate {
772 namespace nnapi {
773 
774 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
NNMemory(const NnApi * nnapi,const char * name,size_t size)775 NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
776   if (name && size > 0) {
777     nnapi_ = nnapi;
778     byte_size_ = size;
779 #ifdef __ANDROID__
780     fd_ = nnapi_->ASharedMemory_create(name, size);
781 #else
782     // For non-Android platforms ASharedMemory_create needs unique name to
783     // create a shared memory object (see nnapi_implementation.cc).
784     char shm_name_buffer[L_tmpnam];
785     if (tmpnam(shm_name_buffer) == nullptr) {
786       shm_name_buffer[0] = '\0';
787     }
788     // tmpnam will produce a string containing with slashes, but shm_open
789     // won't like that.
790     shm_region_name_ = std::string(name) + std::string(shm_name_buffer);
791     std::replace(shm_region_name_.begin(), shm_region_name_.end(), '/', '-');
792     fd_ = nnapi_->ASharedMemory_create(shm_region_name_.c_str(), size);
793 #endif
794 
795     data_ptr_ = reinterpret_cast<uint8_t*>(
796         mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
797     nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
798                                                fd_, 0, &nn_memory_handle_);
799   }
800 }
801 #else
802 NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
803                    size_t /*size*/)
804     : nnapi_(nullptr) {}
805 #endif
806 
~NNMemory()807 NNMemory::~NNMemory() {
808 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
809   if (data_ptr_) {
810     munmap(data_ptr_, byte_size_);
811   }
812   if (nn_memory_handle_) {
813     nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
814   }
815 #ifdef __ANDROID__
816   if (fd_ >= 0) close(fd_);
817 #else
818   if (!shm_region_name_.empty()) shm_unlink(shm_region_name_.c_str());
819 #endif
820 #endif
821 }
822 
823 class DequantizeMapping {
824  public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const825   int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
826     for (const auto& element : mapping_) {
827       if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
828         return std::get<2>(element);
829       }
830     }
831     return -1;
832   }
833 
Add(int ann_index,TfLiteType type,int dequantized_ann_index)834   void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
835     // This assumes it is not already mapped.
836     mapping_.emplace_back(ann_index, type, dequantized_ann_index);
837   }
838 
839  private:
840   // Each tuple specifies the ANN (quantized) tensor index, the desired
841   // floating-point type and the matching ANN (dequantized) tensor index. This
842   // could use a map but instead std::vector is used to keep code size lower.
843   std::vector<std::tuple<int, TfLiteType, int>> mapping_;
844 };
845 
846 // Abstract builder for building an op in the NN API graph. This handles
847 // the disparity between TFLite and NN API operand types. NN API has singular
848 // operands for both tensors and parameters, and TFLite separates the two.
849 class NNAPIOpBuilder {
850  public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,NnapiMappingUtilCInterface * mapping_util,DequantizeMapping * dequantize_mapping,std::map<const MMAPAllocation *,ANeuralNetworksMemory * > * allocation_mapping,ANeuralNetworksModel * nn_model,int * nnapi_errno,bool allow_dynamic_dimensions)851   NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
852                  NnapiMappingUtilCInterface* mapping_util,
853                  DequantizeMapping* dequantize_mapping,
854                  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
855                      allocation_mapping,
856                  ANeuralNetworksModel* nn_model, int* nnapi_errno,
857                  bool allow_dynamic_dimensions)
858       : nnapi_(nnapi),
859         context_(context),
860         mapping_util_(mapping_util),
861         dequantize_mapping_(dequantize_mapping),
862         allocation_memory_mapping_(allocation_mapping),
863         nn_model_(nn_model),
864         nnapi_errno_(nnapi_errno),
865         allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
866 
AddScalarBoolOperand(bool value)867   TfLiteStatus AddScalarBoolOperand(bool value) {
868     return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
869   }
870 
AddScalarInt32Operand(int32_t value)871   TfLiteStatus AddScalarInt32Operand(int32_t value) {
872     return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
873   }
874 
AddScalarFloat32Operand(float value)875   TfLiteStatus AddScalarFloat32Operand(float value) {
876     return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
877   }
878 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)879   TfLiteStatus AddVectorInt32Operand(const int32_t* values,
880                                      uint32_t num_values) {
881     return AddVectorOperand<int32_t>(values, num_values,
882                                      ANEURALNETWORKS_TENSOR_INT32,
883                                      /*scale=*/0.f, /*zero_point=*/0);
884   }
885 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values,float scale,int32_t zero_point)886   TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
887                                      float scale, int32_t zero_point) {
888     return AddVectorOperand<int32_t>(
889         values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
890   }
891 
AddVectorInt16Operand(const int16_t * values,uint32_t num_values)892   TfLiteStatus AddVectorInt16Operand(const int16_t* values,
893                                      uint32_t num_values) {
894     return AddVectorOperand<int16_t>(values, num_values,
895                                      ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
896                                      /*scale=*/1.f, /*zero_point=*/0);
897   }
898 
AddVectorInt8Operand(const int8_t * values,uint32_t num_values)899   TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
900     return AddVectorOperand<int8_t>(values, num_values,
901                                     ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
902                                     /*scale=*/1.f, /*zero_point=*/0);
903   }
904 
AddVectorFloat32Operand(const float * values,uint32_t num_values)905   TfLiteStatus AddVectorFloat32Operand(const float* values,
906                                        uint32_t num_values) {
907     return AddVectorOperand<float>(values, num_values,
908                                    ANEURALNETWORKS_TENSOR_FLOAT32);
909   }
910 
AddPoolingParams(void * data)911   TfLiteStatus AddPoolingParams(void* data) {
912     auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
913     AddScalarInt32Operand(builtin->padding);
914     AddScalarInt32Operand(builtin->stride_width);
915     AddScalarInt32Operand(builtin->stride_height);
916     AddScalarInt32Operand(builtin->filter_width);
917     AddScalarInt32Operand(builtin->filter_height);
918     AddScalarInt32Operand(builtin->activation);
919     return kTfLiteOk;
920   }
921 
AddTensorInput(int tensor_index,bool hybrid_op,int tensor_flags=0)922   TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
923                               int tensor_flags = 0) {
924     return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
925   }
926 
AddTensorOutput(int tensor_index,int tensor_flags=0)927   TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
928     return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
929                      tensor_flags);
930   }
931 
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)932   TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
933     std::vector<uint32_t> dims(dimension_count, 0);
934     return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
935   }
936 
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)937   TfLiteStatus AddStateFloat32Tensor(int tensor_index,
938                                      int* ann_tensor_index_out) {
939     TfLiteTensor* tensor = &context_->tensors[tensor_index];
940     return AddFloat32OutputTensor(
941         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
942         ann_tensor_index_out);
943   }
944 
AddStateInt16Tensor(int tensor_index,int * ann_tensor_index_out)945   TfLiteStatus AddStateInt16Tensor(int tensor_index,
946                                    int* ann_tensor_index_out) {
947     TfLiteTensor* tensor = &context_->tensors[tensor_index];
948     return AddAdditionalOutputTensor(
949         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
950         ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
951         tensor->params.zero_point, ann_tensor_index_out);
952   }
953 
AddStateInt8AsymTensor(int tensor_index,int * ann_tensor_index_out)954   TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
955                                       int* ann_tensor_index_out) {
956     TfLiteTensor* tensor = &context_->tensors[tensor_index];
957     return AddAdditionalOutputTensor(
958         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
959         ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
960         tensor->params.zero_point, ann_tensor_index_out);
961   }
962 
963   // Add a constant tensor with a single element, intended for broadcast capable
964   // ops.
AddSingleValueConstantTensor(float value,bool is_quantized)965   TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
966     if (!is_quantized) {
967       return AddVectorFloat32Operand(&value, 1);
968     } else {
969       // in the case that we need to add a quantized tensor, set the value to
970       // 64, zero_point to be 0 and adjust scale accordingly.
971       const uint8_t quant8_value = 64;
972       return AddVectorOperand<uint8_t>(&quant8_value, 1,
973                                        ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
974                                        value / quant8_value, 0);
975     }
976   }
977 
978   // Calculate the scale and zero_point for 8-bit unsigned tensor, given float
979   // min and max. zero_point is clamped to [0, 255].
CalculateQuantizationParams(float min,float max,float * scale,int * zero_point)980   TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
981                                            int* zero_point) {
982     if (max < min) return kTfLiteError;
983     *scale = (max - min) / 255.f;
984     if (min > 0.f) {
985       *zero_point = 0;
986     } else if (max < 0.f) {
987       *zero_point = 255;
988     } else {
989       *zero_point = (0.f - min) / (*scale);
990     }
991     return kTfLiteOk;
992   }
993 
994   // Lower hardswish according to the following equation:
995   // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
996   // = 0.5x * Relu_N1_to_1(x/3) + 0.5x
TransformHardSwishIntoSupportedOps(int lite_input_index,int lite_output_index,bool need_int8_conversion,int lite_node_index)997   TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
998                                                   int lite_output_index,
999                                                   bool need_int8_conversion,
1000                                                   int lite_node_index) {
1001     const TfLiteTensor& tensor = context_->tensors[lite_input_index];
1002     float input_scale = tensor.params.scale;
1003     int input_zero_point = tensor.params.zero_point;
1004     float input_min = 0.f;
1005     float input_max = 0.f;
1006     int tensor_flags = 0;
1007     if (need_int8_conversion) {
1008       tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
1009       input_zero_point += 128;
1010     }
1011     bool is_quantized = false;
1012     int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1013     if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
1014       is_quantized = true;
1015       nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1016       input_min = (0 - input_zero_point) * input_scale;
1017       input_max = (255 - input_zero_point) * input_scale;
1018     }
1019 
1020     // Stage1 : s1 = Relu1(x * 1/3)
1021     float s1_output_min = 0.f;
1022     float s1_output_max = 0.f;
1023     int s1_out_ann_index = 0;
1024     {
1025       float s1_output_scale = 0.f;
1026       int s1_output_zero_point = 0;
1027       if (is_quantized) {
1028         // clamp the output range to [-1, 1] if needed.
1029         s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
1030         s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
1031         CalculateQuantizationParams(s1_output_min, s1_output_max,
1032                                     &s1_output_scale, &s1_output_zero_point);
1033       }
1034       TF_LITE_ENSURE_OK(context_,
1035                         AddTensorInput(lite_input_index, false, tensor_flags));
1036       const float value3f = 1.f / 3.f;
1037       TF_LITE_ENSURE_OK(context_,
1038                         AddSingleValueConstantTensor(value3f, is_quantized));
1039       TF_LITE_ENSURE_OK(context_,
1040                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
1041       TF_LITE_ENSURE_OK(
1042           context_,
1043           AddAdditionalOutputTensor(
1044               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1045               nn_type, s1_output_scale, s1_output_zero_point,
1046               &s1_out_ann_index));
1047       TF_LITE_ENSURE_OK(
1048           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1049     }
1050 
1051     // Stage2 : s2 = x / 2
1052     float s2_output_min = input_min / 2.f;
1053     float s2_output_max = input_max / 2.f;
1054     int s2_out_ann_index = 0;
1055     {
1056       float s2_output_scale = input_scale / 2.0f;
1057       int s2_output_zero_point = input_zero_point;
1058       TF_LITE_ENSURE_OK(context_,
1059                         AddTensorInput(lite_input_index, false, tensor_flags));
1060       const float value2f = 0.5f;
1061       TF_LITE_ENSURE_OK(context_,
1062                         AddSingleValueConstantTensor(value2f, is_quantized));
1063       TF_LITE_ENSURE_OK(context_,
1064                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1065       TF_LITE_ENSURE_OK(
1066           context_,
1067           AddAdditionalOutputTensor(
1068               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1069               nn_type, s2_output_scale, s2_output_zero_point,
1070               &s2_out_ann_index));
1071       TF_LITE_ENSURE_OK(
1072           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1073     }
1074 
1075     // Stage 3 : s3 = s1 * s2
1076     int s3_out_ann_index = 0;
1077     {
1078       augmented_inputs_.push_back(s1_out_ann_index);
1079       augmented_inputs_.push_back(s2_out_ann_index);
1080       TF_LITE_ENSURE_OK(context_,
1081                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1082       float s3_output_scale = 0.f;
1083       int s3_output_zero_point = 0;
1084       if (is_quantized) {
1085         // the min for stage 3 is always 0.0f.
1086         float s3_output_min = 0.f;
1087         // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
1088         float s3_output_max =
1089             s1_output_max * s2_output_max > s1_output_min * s2_output_min
1090                 ? s1_output_max * s2_output_max
1091                 : s1_output_min * s2_output_min;
1092         CalculateQuantizationParams(s3_output_min, s3_output_max,
1093                                     &s3_output_scale, &s3_output_zero_point);
1094       }
1095       TF_LITE_ENSURE_OK(
1096           context_,
1097           AddAdditionalOutputTensor(
1098               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1099               nn_type, s3_output_scale, s3_output_zero_point,
1100               &s3_out_ann_index));
1101       TF_LITE_ENSURE_OK(
1102           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1103     }
1104 
1105     // Stage 4: y = s3 + s2
1106     {
1107       augmented_inputs_.push_back(s2_out_ann_index);
1108       augmented_inputs_.push_back(s3_out_ann_index);
1109       TF_LITE_ENSURE_OK(context_,
1110                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1111       TF_LITE_ENSURE_OK(context_,
1112                         AddTensorOutput(lite_output_index, tensor_flags));
1113       TF_LITE_ENSURE_OK(
1114           context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
1115     }
1116 
1117     return kTfLiteOk;
1118   }
1119 
1120   // Adds the operation to the model and maps the operation to the originating
1121   // TFLite one.
AddOperationToModel(ANeuralNetworksOperationType type,uint32_t input_count,const uint32_t * inputs,uint32_t output_count,const uint32_t * outputs,int lite_node_index)1122   TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
1123                                    uint32_t input_count, const uint32_t* inputs,
1124                                    uint32_t output_count,
1125                                    const uint32_t* outputs,
1126                                    int lite_node_index) {
1127     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1128         context_,
1129         nnapi_->ANeuralNetworksModel_addOperation(
1130             nn_model_, type, input_count, inputs, output_count, outputs),
1131         "adding operation", nnapi_errno_);
1132     mapping_util_->AddNnapiToTfliteOpMapping(mapping_util_, lite_node_index);
1133     return kTfLiteOk;
1134   }
1135 
1136   // Adds a Dequantize operator and replaces the input tensor index with the
1137   // dequantized version. If the dequantized version of the operator already
1138   // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_tensor_index,TfLiteType dequantized_type,int lite_node_index)1139   TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
1140                              TfLiteType dequantized_type, int lite_node_index) {
1141     const int ann_index =
1142         mapping_util_->TfLiteIndexToNnIndex(mapping_util_, lite_tensor_index);
1143     int dequantized_ann_index =
1144         dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
1145 
1146     if (dequantized_ann_index == -1) {
1147       // The dequantized version does not exist yet, it has to be added: a new
1148       // Dequantize operation is added, yielding a new tensor.
1149       const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
1150       ANeuralNetworksOperandType operand_type{
1151           ANEURALNETWORKS_TENSOR_FLOAT32,
1152           static_cast<uint32_t>(tensor.dims->size),
1153           reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
1154       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1155           context_,
1156           nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1157           "adding operand", nnapi_errno_);
1158       dequantized_ann_index =
1159           mapping_util_->AddNewNonTensorOperand(mapping_util_);
1160 
1161       // Add Dequantize operation.
1162       const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
1163       const uint32_t dequantize_output[1] = {
1164           static_cast<uint32_t>(dequantized_ann_index)};
1165       TF_LITE_ENSURE_OK(
1166           context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
1167                                         /*input_count=*/1, dequantize_input,
1168                                         /*output_count=*/1, dequantize_output,
1169                                         lite_node_index));
1170       dequantize_mapping_->Add(ann_index, dequantized_type,
1171                                dequantized_ann_index);
1172     }
1173 
1174     // The input for the original operation is modified so that the operation
1175     // now uses the dequantized tensor as input.
1176     augmented_inputs_[nn_input_index] = dequantized_ann_index;
1177 
1178     return kTfLiteOk;
1179   }
1180 
1181   // Add a RESHAPE op which reshapes an NNAPI intermediate output to the
1182   // dimensions of the TFLite output tensor.
AppendReshape(int nn_input_index,int lite_out_tensor_index,int lite_node_index)1183   TfLiteStatus AppendReshape(int nn_input_index, int lite_out_tensor_index,
1184                              int lite_node_index) {
1185     augmented_inputs_.push_back(nn_input_index);
1186     auto& output_tensor = context_->tensors[lite_out_tensor_index];
1187     TF_LITE_ENSURE_STATUS(
1188         AddVectorInt32Operand(output_tensor.dims->data,
1189                               static_cast<uint32_t>(output_tensor.dims->size)));
1190     TF_LITE_ENSURE_OK(context_,
1191                       AddTensorOutput(lite_out_tensor_index,
1192                                       NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1193     TF_LITE_ENSURE_STATUS(
1194         FinalizeAddOperation(ANEURALNETWORKS_RESHAPE, lite_node_index));
1195     return kTfLiteOk;
1196   }
1197 
1198   // Add a ADD op to requantize an NNAPI intermediate output to the scale and
1199   // zero point of the TFLite output tensor.
AppendRequantize(int nn_input_index,int lite_out_tensor_index,int lite_node_index,int tensor_flags=0)1200   TfLiteStatus AppendRequantize(int nn_input_index, int lite_out_tensor_index,
1201                                 int lite_node_index, int tensor_flags = 0) {
1202     augmented_inputs_.push_back(nn_input_index);
1203     auto& output_tensor = context_->tensors[lite_out_tensor_index];
1204 
1205     // Create a zero vector with the same type as the output type. There is only
1206     // one single element in the vector, and it is broadcastable with any
1207     // tensor.
1208     TF_LITE_ENSURE(context_, IsQuantized(output_tensor.type));
1209     bool need_int8_conversion = tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1210     int nn_type = (output_tensor.type == kTfLiteUInt8 || need_int8_conversion)
1211                       ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
1212                       : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1213     int8_t zero = 0;
1214     TF_LITE_ENSURE_STATUS(AddVectorOperand(&zero, /*num_values=*/1, nn_type,
1215                                            /*scale=*/1.0f, /*zero_point=*/0));
1216 
1217     TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1218     TF_LITE_ENSURE_STATUS(AddTensorOutput(lite_out_tensor_index, tensor_flags));
1219     TF_LITE_ENSURE_STATUS(
1220         FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
1221     return kTfLiteOk;
1222   }
1223 
1224   // Lower PACK into CONCAT + RESHAPE when possible
TransformPackIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1225   TfLiteStatus TransformPackIntoSupportedOps(int lite_node_index,
1226                                              TfLiteNode* node,
1227                                              TfLiteRegistration* reg) {
1228     // Add input tensors for CONCAT, and calculate the dimensions for the
1229     // output.
1230     int concat_output_ann_index = -1;
1231     TfLitePackParams* builtin =
1232         reinterpret_cast<TfLitePackParams*>(node->builtin_data);
1233     auto& input_tensor = context_->tensors[node->inputs->data[0]];
1234     int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
1235                                  : builtin->axis;
1236     TF_LITE_ENSURE(context_, axis < input_tensor.dims->size);
1237     uint32_t concat_dim_size = 0;
1238     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
1239       const auto input_index = node->inputs->data[input_pos];
1240       concat_dim_size +=
1241           context_->tensors[node->inputs->data[input_pos]].dims->data[axis];
1242       TF_LITE_ENSURE_STATUS(
1243           AddTensorInput(input_index, /*hybrid_op=*/false,
1244                          NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1245     }
1246     TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
1247     std::vector<uint32_t> concat_output_shape(input_tensor.dims->size, 0);
1248     for (int i = 0; i < concat_output_shape.size(); i++) {
1249       if (i == axis) {
1250         concat_output_shape[i] = concat_dim_size;
1251       } else {
1252         concat_output_shape[i] = input_tensor.dims->data[i];
1253       }
1254     }
1255     TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
1256         input_tensor.type, concat_output_shape.size(),
1257         concat_output_shape.data(), input_tensor.params.scale,
1258         input_tensor.params.zero_point, &concat_output_ann_index));
1259     TF_LITE_ENSURE_STATUS(
1260         FinalizeAddOperation(ANEURALNETWORKS_CONCATENATION, lite_node_index));
1261 
1262     // Reshape the output tensor
1263     TF_LITE_ENSURE_STATUS(AppendReshape(
1264         concat_output_ann_index, node->outputs->data[0], lite_node_index));
1265     return kTfLiteOk;
1266   }
1267 
1268   // Lower UNPACK into RESHAPE + SPLIT when possible.
TransformUnpackIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1269   TfLiteStatus TransformUnpackIntoSupportedOps(int lite_node_index,
1270                                                TfLiteNode* node,
1271                                                TfLiteRegistration* reg) {
1272     auto& input_tensor = context_->tensors[node->inputs->data[0]];
1273 
1274     auto* builtin = reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
1275     int axis = builtin->axis < 0 ? builtin->axis + input_tensor.dims->size
1276                                  : builtin->axis;
1277     TF_LITE_ENSURE(context_, axis >= 0);
1278     TF_LITE_ENSURE(context_, axis < (input_tensor.dims->size - 1));
1279     int num_splits = builtin->num;
1280     TF_LITE_ENSURE(context_, num_splits == input_tensor.dims->data[axis]);
1281     TF_LITE_ENSURE(context_, num_splits == node->outputs->size);
1282 
1283     // Step 1: RESHAPE
1284     std::vector<int32_t> intermediate_shape(input_tensor.dims->size - 1);
1285     std::copy(input_tensor.dims->data, input_tensor.dims->data + axis,
1286               intermediate_shape.begin());
1287     intermediate_shape[axis] =
1288         input_tensor.dims->data[axis] * input_tensor.dims->data[axis + 1];
1289     std::copy(input_tensor.dims->data + axis + 2,
1290               input_tensor.dims->data + input_tensor.dims->size,
1291               intermediate_shape.begin() + axis + 1);
1292 
1293     TF_LITE_ENSURE_STATUS(AddTensorInput(node->inputs->data[0],
1294                                          /*hybrid_op=*/false,
1295                                          NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1296     TF_LITE_ENSURE_STATUS(AddVectorInt32Operand(intermediate_shape.data(),
1297                                                 intermediate_shape.size()));
1298     int reshape_output_ann_index = -1;
1299     float scale = input_tensor.params.scale;
1300     // Quantized tensor with zero scale is not valid in NNAPI.
1301     if (IsQuantized(input_tensor.type) && scale == 0.0f) {
1302       scale = 1.0f;
1303     }
1304     TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
1305         input_tensor.type, intermediate_shape.size(),
1306         reinterpret_cast<uint32_t*>(intermediate_shape.data()), scale,
1307         input_tensor.params.zero_point, &reshape_output_ann_index));
1308     TF_LITE_ENSURE_STATUS(
1309         FinalizeAddOperation(ANEURALNETWORKS_RESHAPE, lite_node_index));
1310 
1311     // Step 2: SPLIT
1312     augmented_inputs_.push_back(reshape_output_ann_index);
1313     TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
1314     TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(num_splits));
1315     for (int i = 0; i < num_splits; i++) {
1316       int lite_output_index = node->outputs->data[i];
1317       TF_LITE_ENSURE_STATUS(AddTensorOutput(
1318           lite_output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1319     }
1320     TF_LITE_ENSURE_STATUS(
1321         FinalizeAddOperation(ANEURALNETWORKS_SPLIT, lite_node_index));
1322     return kTfLiteOk;
1323   }
1324 
1325   // Lower SPLIT_V into SLICEs.
TransformSplitVIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1326   TfLiteStatus TransformSplitVIntoSupportedOps(int lite_node_index,
1327                                                TfLiteNode* node,
1328                                                TfLiteRegistration* reg) {
1329     auto& input = context_->tensors[node->inputs->data[0]];
1330     int input_rank = input.dims->size;
1331 
1332     const auto& size_splits_tensor = context_->tensors[node->inputs->data[1]];
1333     const auto* size_splits = size_splits_tensor.data.i32;
1334     int num_splits = size_splits_tensor.dims->data[0];
1335     int axis = context_->tensors[node->inputs->data[2]].data.i32[0];
1336     axis = axis < 0 ? axis + input_rank : axis;
1337     TF_LITE_ENSURE(context_, axis >= 0);
1338     TF_LITE_ENSURE(context_, axis < input_rank);
1339     int unknown_split_size = ComputeSplitVUnknownSplitSize(context_, node);
1340 
1341     // Keep track of the start index of a slice.
1342     int slice_begin_index = 0;
1343     for (int split_index = 0; split_index < num_splits; split_index++) {
1344       int split_size = size_splits[split_index] == -1
1345                            ? unknown_split_size
1346                            : size_splits[split_index];
1347       TF_LITE_ENSURE(context_, split_size > 0);
1348 
1349       // Parameters of SLICE.
1350       std::vector<int> begin_indices(input_rank);
1351       std::vector<int> slice_sizes(input_rank);
1352       for (int i = 0; i < input_rank; i++) {
1353         if (i == axis) {
1354           // Take only the splitted size.
1355           begin_indices[i] = slice_begin_index;
1356           slice_sizes[i] = split_size;
1357         } else {
1358           // Take the full size.
1359           begin_indices[i] = 0;
1360           slice_sizes[i] = input.dims->data[i];
1361         }
1362       }
1363       slice_begin_index += split_size;
1364 
1365       // Build NNAPI SLICE inputs and output.
1366       TF_LITE_ENSURE_STATUS(AddTensorInput(
1367           node->inputs->data[0],
1368           /*hybrid_op=*/false, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1369       TF_LITE_ENSURE_STATUS(
1370           AddVectorInt32Operand(begin_indices.data(), begin_indices.size()));
1371       TF_LITE_ENSURE_STATUS(
1372           AddVectorInt32Operand(slice_sizes.data(), slice_sizes.size()));
1373       int lite_output_index = node->outputs->data[split_index];
1374       TF_LITE_ENSURE_STATUS(AddTensorOutput(
1375           lite_output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1376 
1377       TF_LITE_ENSURE_STATUS(
1378           FinalizeAddOperation(ANEURALNETWORKS_SLICE, lite_node_index));
1379     }
1380     return kTfLiteOk;
1381   }
1382 
1383   // Lower SQUARED_DIFFERENCE into SUB and MUL.
TransformSquaredDifferenceIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1384   TfLiteStatus TransformSquaredDifferenceIntoSupportedOps(
1385       int lite_node_index, TfLiteNode* node, TfLiteRegistration* reg) {
1386     const TfLiteTensor& lhs = context_->tensors[node->inputs->data[0]];
1387     const TfLiteTensor& output = context_->tensors[node->outputs->data[0]];
1388 
1389     // Stage1 : diff = lhs - rhs
1390     int diff_out_ann_index = 0;
1391     {
1392       // For quantized data type, choose a proper scale and zero point based on
1393       // the output range.
1394       float max_output = 0.f;
1395       int diff_output_zero_point = 0;
1396       int diff_output_nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1397       switch (lhs.type) {
1398         case kTfLiteFloat32:
1399           diff_output_nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1400           break;
1401         case kTfLiteInt32:
1402           diff_output_nn_type = ANEURALNETWORKS_TENSOR_INT32;
1403           break;
1404         case kTfLiteUInt8:
1405           max_output = (255 - output.params.zero_point) * output.params.scale;
1406           diff_output_zero_point = 128;
1407           diff_output_nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1408           break;
1409         case kTfLiteInt8:
1410           max_output = (127 - output.params.zero_point) * output.params.scale;
1411           diff_output_zero_point = 0;
1412           diff_output_nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1413           break;
1414         default:
1415           return kTfLiteError;
1416       }
1417       // Final output range: [0, max_output], and output = diff^2,
1418       // -> diff range: [-sqrt(max_output), sqrt(max_output)]
1419       // This range corresponds to [1, 255] for uint8 with zero_point = 128,
1420       // or [-127, 127] for int8 with zero_point = 0.
1421       float diff_output_scale = 2.0f * std::sqrt(max_output) / 254.0f;
1422 
1423       TF_LITE_ENSURE_OK(
1424           context_, AddTensorInput(node->inputs->data[0], /*hybrid_op=*/false,
1425                                    NN_TENSOR_FLAG_SCALAR_AS_TENSOR |
1426                                        NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1427       TF_LITE_ENSURE_OK(
1428           context_, AddTensorInput(node->inputs->data[1], /*hybrid_op=*/false,
1429                                    NN_TENSOR_FLAG_SCALAR_AS_TENSOR |
1430                                        NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1431       TF_LITE_ENSURE_OK(context_,
1432                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1433       TF_LITE_ENSURE_OK(
1434           context_,
1435           AddAdditionalOutputTensor(
1436               output.dims->size, reinterpret_cast<uint32_t*>(output.dims->data),
1437               diff_output_nn_type, diff_output_scale, diff_output_zero_point,
1438               &diff_out_ann_index));
1439       TF_LITE_ENSURE_OK(
1440           context_, FinalizeAddOperation(ANEURALNETWORKS_SUB, lite_node_index));
1441     }
1442 
1443     // Stage2 : out = diff * diff
1444     {
1445       augmented_inputs_.push_back(diff_out_ann_index);
1446       augmented_inputs_.push_back(diff_out_ann_index);
1447       TF_LITE_ENSURE_OK(context_,
1448                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1449       TF_LITE_ENSURE_OK(context_,
1450                         AddTensorOutput(node->outputs->data[0],
1451                                         NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1452       TF_LITE_ENSURE_OK(
1453           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1454     }
1455 
1456     return kTfLiteOk;
1457   }
1458 
1459   // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type,int lite_node_index)1460   TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
1461                                     int lite_node_index) {
1462     // Actually add a NN API operation
1463     TF_LITE_ENSURE_OK(context_,
1464                       AddOperationToModel(
1465                           type, static_cast<uint32_t>(augmented_inputs_.size()),
1466                           augmented_inputs_.data(),
1467                           static_cast<uint32_t>(augmented_outputs_.size()),
1468                           augmented_outputs_.data(), lite_node_index));
1469     augmented_inputs_.clear();
1470     augmented_outputs_.clear();
1471     return kTfLiteOk;
1472   }
1473 
AddSingleValueTensorAsScalarOperand(int tensor_index,int nn_type)1474   TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
1475                                                    int nn_type) {
1476     const TfLiteTensor* tensor = &context_->tensors[tensor_index];
1477     TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
1478 
1479     ANeuralNetworksOperandType operand_type{.type = nn_type};
1480     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1481         context_,
1482         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1483         "adding operand", tensor, nnapi_errno_);
1484     int ann_tensor_index =
1485         mapping_util_->TfLiteIndexToNnIndex(mapping_util_, tensor_index);
1486     if (ann_tensor_index != -1) {
1487       augmented_inputs_.push_back(ann_tensor_index);
1488       return kTfLiteOk;
1489     }
1490     // Allocate a new tensor index
1491     ann_tensor_index =
1492         mapping_util_->AddNewNnTensorIndex(mapping_util_, tensor_index);
1493     augmented_inputs_.push_back(ann_tensor_index);
1494 
1495     const TfLiteType tensor_type = tensor->type;
1496     TfLiteType nn_type_equivalent;
1497     TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
1498                                                        &nn_type_equivalent));
1499     if (tensor_type != nn_type_equivalent) {
1500       mapping_util_->AddTypeConversion(mapping_util_, tensor_index,
1501                                        nn_type_equivalent);
1502     }
1503     return kTfLiteOk;
1504   }
1505 
1506   template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,const TfLiteIntArray * dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1507   TfLiteStatus AddNewInputConstantTensor(
1508       int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
1509       const std::vector<T>& tensor_value,
1510       const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1511     TF_LITE_ENSURE_OK(context_,
1512                       context_->AddTensors(context_, 1, tensor_index));
1513 
1514     TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
1515     new_tensor->type = type;
1516     new_tensor->allocation_type = kTfLiteDynamic;
1517     new_tensor->params = quant_params;
1518 
1519     // Not removing the new tensor in case of resizing errors since it will
1520     // be cleared by the context
1521     TF_LITE_ENSURE_OK(
1522         context_,
1523         context_->ResizeTensor(
1524             context_, new_tensor,
1525             // Resize Tensor takes ownership of the dims array passed as param
1526             TfLiteIntArrayCopy(dims)));
1527 
1528     memcpy(new_tensor->data.raw,
1529            reinterpret_cast<const char*>(tensor_value.data()),
1530            tensor_value.size() * sizeof(T));
1531 
1532     const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
1533     const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
1534     ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1535                                             quant_params.scale,
1536                                             quant_params.zero_point};
1537 
1538     const int ann_tensor_index =
1539         mapping_util_->AddDelegateGeneratedInputAnnTensorOperand(mapping_util_);
1540 
1541     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1542         context_,
1543         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1544         "adding operand", nnapi_errno_);
1545 
1546     augmented_inputs_.push_back(ann_tensor_index);
1547 
1548     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1549         context_,
1550         nnapi_->ANeuralNetworksModel_setOperandValue(
1551             nn_model_, ann_tensor_index, new_tensor->data.raw,
1552             new_tensor->bytes),
1553         "setting new operand value", nnapi_errno_);
1554 
1555     return kTfLiteOk;
1556   }
1557 
1558   template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,std::initializer_list<int> dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1559   TfLiteStatus AddNewInputConstantTensor(
1560       int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
1561       const std::vector<T>& tensor_value,
1562       const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1563     TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
1564     dim_array->size = dims.size();
1565     std::copy(dims.begin(), dims.end(), dim_array->data);
1566 
1567     const auto result = AddNewInputConstantTensor(
1568         nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
1569     TfLiteIntArrayFree(dim_array);
1570     return result;
1571   }
1572 
AddIntermediateOutputTensor(TfLiteType tfl_type,uint32_t dimension_count,const uint32_t * dimension_data,float scale,int32_t zero_point,int * ann_index_out,bool need_int8_conversion=false)1573   TfLiteStatus AddIntermediateOutputTensor(TfLiteType tfl_type,
1574                                            uint32_t dimension_count,
1575                                            const uint32_t* dimension_data,
1576                                            float scale, int32_t zero_point,
1577                                            int* ann_index_out,
1578                                            bool need_int8_conversion = false) {
1579     int32_t nn_type;
1580     switch (tfl_type) {
1581       case kTfLiteFloat32:
1582         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1583         break;
1584       case kTfLiteInt8:
1585         nn_type = need_int8_conversion
1586                       ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
1587                       : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1588         break;
1589       case kTfLiteUInt8:
1590         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1591         break;
1592       default:
1593         return kTfLiteError;
1594     }
1595     if (need_int8_conversion) {
1596       zero_point += 128;
1597     }
1598     TF_LITE_ENSURE_STATUS(
1599         AddAdditionalOutputTensor(dimension_count, dimension_data, nn_type,
1600                                   scale, zero_point, ann_index_out));
1601     return kTfLiteOk;
1602   }
1603 
ClearInputOuputLists()1604   void ClearInputOuputLists() {
1605     augmented_inputs_.clear();
1606     augmented_outputs_.clear();
1607   }
1608 
1609  private:
1610   // Returns a TF Lite type which has the same memory representation as a
1611   // provided NN API type.
GetEquivalentToANNType(TfLiteContext * context,int nn_type,TfLiteType * type)1612   TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
1613                                       TfLiteType* type) {
1614     switch (nn_type) {
1615       case ANEURALNETWORKS_INT32:
1616         *type = kTfLiteInt32;
1617         return kTfLiteOk;
1618       case ANEURALNETWORKS_FLOAT32:
1619         *type = kTfLiteFloat32;
1620         return kTfLiteOk;
1621       default:
1622         TF_LITE_KERNEL_LOG(context,
1623                            "NN API Delegate: Can't get an equivalent TF Lite "
1624                            "type for provided NN API type: %d.\n",
1625                            nn_type);
1626         return kTfLiteError;
1627     }
1628   }
1629 
1630   template <typename T>
AddScalarOperand(T value,int32_t nn_type)1631   TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
1632     ANeuralNetworksOperandType operand_type{.type = nn_type};
1633     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1634         context_,
1635         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1636         "adding operand", nnapi_errno_);
1637     const int ann_index = mapping_util_->AddNewNonTensorOperand(mapping_util_);
1638     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1639         context_,
1640         nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
1641                                                      &value, sizeof(T)),
1642         "setting new operand value", nnapi_errno_);
1643     augmented_inputs_.push_back(ann_index);
1644     return kTfLiteOk;
1645   }
1646 
1647   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type,float scale,int32_t zero_point)1648   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1649                                 int32_t nn_type, float scale,
1650                                 int32_t zero_point) {
1651     ANeuralNetworksOperandType operand_type{.type = nn_type,
1652                                             .dimensionCount = 1,
1653                                             .dimensions = &num_values,
1654                                             .scale = scale,
1655                                             .zeroPoint = zero_point};
1656 
1657     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1658         context_,
1659         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1660         "adding operand", nnapi_errno_);
1661 
1662     const int ann_index = mapping_util_->AddNewNonTensorOperand(mapping_util_);
1663     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1664         context_,
1665         nnapi_->ANeuralNetworksModel_setOperandValue(
1666             nn_model_, ann_index, values, sizeof(T) * num_values),
1667         "settings new operand value", nnapi_errno_);
1668     augmented_inputs_.push_back(ann_index);
1669     return kTfLiteOk;
1670   }
1671 
1672   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)1673   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1674                                 int32_t nn_type) {
1675     return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
1676                             /*zero_point=*/0);
1677   }
1678 
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)1679   TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
1680                                       const uint32_t* dimension_data,
1681                                       int* ann_index_out) {
1682     return AddAdditionalOutputTensor(
1683         dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
1684         /*scale=*/0.f, /*zero_point=*/0, ann_index_out);
1685   }
1686 
AddAdditionalOutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int32_t nn_type,float scale,int32_t zero_point,int * ann_index_out)1687   TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
1688                                          const uint32_t* dimension_data,
1689                                          int32_t nn_type, float scale,
1690                                          int32_t zero_point,
1691                                          int* ann_index_out) {
1692     ANeuralNetworksOperandType operand_type{
1693         .type = nn_type,
1694         .dimensionCount = dimension_count,
1695         .dimensions = dimension_data,
1696         .scale = scale,
1697         .zeroPoint = zero_point,
1698     };
1699     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1700         context_,
1701         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1702         "adding operand", nnapi_errno_);
1703     const int ann_index = mapping_util_->AddNewNonTensorOperand(mapping_util_);
1704     augmented_outputs_.push_back(ann_index);
1705     if (ann_index_out) *ann_index_out = ann_index;
1706     return kTfLiteOk;
1707   }
1708 
1709   // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
1710   // This returns the NN API tensor index corresponding to the created tensor.
1711   // If another caller previously created a NN API tensor for `tensor_index`
1712   // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices,int tensor_flags=0)1713   TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
1714                          std::vector<uint32_t>* indices, int tensor_flags = 0) {
1715     const bool scalar_as_tensor =
1716         tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
1717     const bool need_int8_conversion =
1718         tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1719     const bool use_int8_asymm_signed =
1720         tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
1721     const bool force_per_channel =
1722         tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
1723     const bool need_half2float_conversion =
1724         tensor_flags & NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
1725 
1726     int ann_tensor_index =
1727         mapping_util_->TfLiteIndexToNnIndex(mapping_util_, tensor_index);
1728     if (ann_tensor_index != -1) {
1729       indices->push_back(ann_tensor_index);
1730       return kTfLiteOk;
1731     }
1732     // Allocate a new tensor index
1733     ann_tensor_index =
1734         mapping_util_->AddNewNnTensorIndex(mapping_util_, tensor_index);
1735 
1736     // Parameters needed for new type.
1737     int32_t nn_type = 0;
1738     float scale = 0.0f;
1739     int32_t zeroPoint = 0;
1740     ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
1741     TfLiteTensor* tensor = &context_->tensors[tensor_index];
1742     TfLiteType tensor_type = tensor->type;
1743     if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
1744       // For legacy reason, UINT8 weights in hybrid operators are actually INT8
1745       // values and should be interpreted as such.
1746       tensor_type = kTfLiteInt8;
1747     }
1748     switch (tensor_type) {
1749       case kTfLiteNoType:
1750         // Tensors added during initialization of Ops don't have a type yet and
1751         // should not be registered with the NNAPI.
1752         indices->push_back(-1);
1753         return kTfLiteOk;
1754       case kTfLiteFloat32:
1755         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1756         break;
1757       case kTfLiteFloat16:
1758         nn_type = ANEURALNETWORKS_TENSOR_FLOAT16;
1759         if (need_half2float_conversion) {
1760           nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1761           mapping_util_->AddTypeConversion(mapping_util_, tensor_index,
1762                                            kTfLiteFloat32);
1763         }
1764         break;
1765       case kTfLiteUInt8:
1766         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1767         scale = tensor->params.scale;
1768         zeroPoint = tensor->params.zero_point;
1769         if (scale == 0) {
1770           // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
1771           // NNAPI.
1772           scale = 1;
1773         }
1774         break;
1775       case kTfLiteInt8:
1776         // If explicit int8 conversion is needed, we still need
1777         // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
1778         if (use_int8_asymm_signed) {
1779           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1780         } else if (need_int8_conversion) {
1781           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1782         } else {
1783           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
1784         }
1785         scale = tensor->params.scale;
1786         zeroPoint = tensor->params.zero_point;
1787         if (tensor->quantization.type == kTfLiteAffineQuantization) {
1788           TfLiteAffineQuantization* quantization_params =
1789               static_cast<TfLiteAffineQuantization*>(
1790                   tensor->quantization.params);
1791           if (quantization_params->scale->size > 1 || force_per_channel) {
1792             // Set up per-channel quantization.
1793             ann_perchannel_params = {
1794                 .channelDim = static_cast<uint32_t>(
1795                     quantization_params->quantized_dimension),
1796                 .scaleCount =
1797                     static_cast<uint32_t>(quantization_params->scale->size),
1798                 .scales = quantization_params->scale->data,
1799             };
1800             nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
1801             scale = 0.0f;
1802             zeroPoint = 0;
1803           } else if (quantization_params->scale->size == 1) {
1804             scale = quantization_params->scale->data[0];
1805             zeroPoint = quantization_params->zero_point->data[0];
1806           }
1807         }
1808         if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1809           if (need_int8_conversion) {
1810             zeroPoint += 128;
1811             mapping_util_->AddTypeConversion(mapping_util_, tensor_index,
1812                                              kTfLiteUInt8);
1813           }
1814           if (scale == 0) {
1815             // QUANT8 tensors with zero scale are not valid in NNAPI.
1816             scale = 1;
1817           }
1818         }
1819         break;
1820       case kTfLiteInt32:
1821         nn_type = ANEURALNETWORKS_TENSOR_INT32;
1822         scale = tensor->params.scale;
1823         zeroPoint = tensor->params.zero_point;
1824         break;
1825       case kTfLiteBool:
1826         nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
1827         break;
1828       case kTfLiteInt16:
1829         nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
1830         scale = tensor->params.scale;
1831         zeroPoint = tensor->params.zero_point;
1832         break;
1833       default:
1834         context_->ReportError(
1835             context_, "Failed to add NN API tensor: type %s is not supported.",
1836             TfLiteTypeGetName(tensor_type));
1837         return kTfLiteError;
1838     }
1839     bool has_unspecified_dimensions = ::tflite::HasUnspecifiedDimension(tensor);
1840     uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
1841     std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
1842     if (has_unspecified_dimensions) {
1843       for (int i = 0; i < tensor->dims_signature->size; i++) {
1844         dims_unspecified[i] = tensor->dims_signature->data[i] == -1
1845                                   ? 0
1846                                   : tensor->dims_signature->data[i];
1847       }
1848     }
1849     uint32_t* tensor_dims =
1850         has_unspecified_dimensions && allow_dynamic_dimensions_
1851             ? dims_unspecified.data()
1852             : reinterpret_cast<uint32_t*>(tensor->dims->data);
1853     if (scalar_as_tensor && tensor_rank == 0) {
1854       // Use rank 1, shape {1} operand for TFLite scalar tensors.
1855       tensor_rank = 1;
1856       tensor_dims = &tensor_rank;
1857     }
1858     if (tensor_rank == 0) {
1859       // if the tensor_rank is 0, the dimension ptr must be nullptr.
1860       tensor_dims = nullptr;
1861     }
1862 
1863     ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1864                                             scale, zeroPoint};
1865     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1866         context_,
1867         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1868         "adding operand", tensor, nnapi_errno_);
1869 
1870     if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1871       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1872           context_,
1873           nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
1874               nn_model_, ann_tensor_index, &ann_perchannel_params),
1875           "setting new operand per channel quantization params", tensor,
1876           nnapi_errno_);
1877     }
1878     if (tensor->allocation_type == kTfLiteMmapRo) {
1879       if (IsQuantized(tensor_type) && need_int8_conversion &&
1880           nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1881         // We need to add a tensor and convert the weights into uint8.
1882         // Currently this is only needed for fully_connected. The new_tensor is
1883         // needed for lifetime management for the converted weights.
1884         int new_tensor_index = -1;
1885         TF_LITE_ENSURE_OK(context_,
1886                           context_->AddTensors(context_, 1, &new_tensor_index));
1887         TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1888         new_tensor->type = kTfLiteUInt8;
1889         new_tensor->allocation_type = kTfLiteDynamic;
1890         new_tensor->params.scale = scale;
1891         new_tensor->params.zero_point = zeroPoint;
1892         // Not removing the new tensor in case of resizing errors since it will
1893         // be cleared by the context
1894         TF_LITE_ENSURE_OK(
1895             context_, context_->ResizeTensor(context_, new_tensor,
1896                                              // Resize Tensor takes ownership of
1897                                              // the dims array passed as param
1898                                              TfLiteIntArrayCopy(tensor->dims)));
1899         // Convert the int8 value into corresponding uint8 value;
1900         const auto num_elements = NumElements(tensor);
1901         for (int i = 0; i < num_elements; ++i) {
1902           new_tensor->data.uint8[i] = static_cast<const uint8_t>(
1903               static_cast<int32_t>(tensor->data.int8[i]) + 128);
1904         }
1905         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1906             context_,
1907             nnapi_->ANeuralNetworksModel_setOperandValue(
1908                 nn_model_, ann_tensor_index, new_tensor->data.raw,
1909                 new_tensor->bytes),
1910             "setting new operand value", tensor, nnapi_errno_);
1911       } else if (tensor_type == kTfLiteFloat16 && need_half2float_conversion) {
1912         // We need to convert the constant fp16 weights to fp32. The new_tensor
1913         // is needed for lifetime management for the converted weights.
1914         int new_tensor_index = -1;
1915         TF_LITE_ENSURE_OK(context_,
1916                           context_->AddTensors(context_, 1, &new_tensor_index));
1917         TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1918         new_tensor->type = kTfLiteFloat32;
1919         new_tensor->allocation_type = kTfLiteDynamic;
1920         // Not removing the new tensor in case of resizing errors since it will
1921         // be cleared by the context
1922         TF_LITE_ENSURE_OK(
1923             context_, context_->ResizeTensor(context_, new_tensor,
1924                                              // Resize Tensor takes ownership of
1925                                              // the dims array passed as param
1926                                              TfLiteIntArrayCopy(tensor->dims)));
1927         // Convert the fp16 value into corresponding fp32 value;
1928         const auto num_elements = NumElements(tensor);
1929         for (int i = 0; i < num_elements; ++i) {
1930           new_tensor->data.f[i] = fp16_ieee_to_fp32_value(
1931               reinterpret_cast<uint16_t*>(tensor->data.data)[i]);
1932         }
1933         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1934             context_,
1935             nnapi_->ANeuralNetworksModel_setOperandValue(
1936                 nn_model_, ann_tensor_index, new_tensor->data.data,
1937                 new_tensor->bytes),
1938             "setting new operand value", tensor, nnapi_errno_);
1939 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
1940       } else if (tensor->allocation &&
1941                  static_cast<const Allocation*>(tensor->allocation)->type() ==
1942                      Allocation::Type::kMMap) {
1943         const MMAPAllocation* mmap_alloc =
1944             static_cast<const MMAPAllocation*>(tensor->allocation);
1945         if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
1946           ANeuralNetworksMemory* ann_memory_handle = nullptr;
1947           nnapi_->ANeuralNetworksMemory_createFromFd(
1948               mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
1949               &ann_memory_handle);
1950           allocation_memory_mapping_->insert(
1951               std::make_pair(mmap_alloc, ann_memory_handle));
1952         }
1953         ANeuralNetworksMemory* ann_memory_handle =
1954             allocation_memory_mapping_->at(mmap_alloc);
1955         // Compute the offset to the base pointer of the MMAPAllocation.
1956         auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
1957                       reinterpret_cast<const uint8_t*>(mmap_alloc->base());
1958         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1959             context_,
1960             nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
1961                 nn_model_, ann_tensor_index, ann_memory_handle, offset,
1962                 tensor->bytes),
1963             "setting new operand value from memory", tensor, nnapi_errno_);
1964 #endif
1965       } else {
1966         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1967             context_,
1968             nnapi_->ANeuralNetworksModel_setOperandValue(
1969                 nn_model_, ann_tensor_index, tensor->data.data, tensor->bytes),
1970             "setting new operand value", tensor, nnapi_errno_);
1971       }
1972     }
1973     indices->push_back(ann_tensor_index);
1974     return kTfLiteOk;
1975   }
1976 
1977   // Access to NNAPI.
1978   const NnApi* const nnapi_;
1979 
1980   // TfLiteContext for error handling.
1981   TfLiteContext* const context_;
1982 
1983   // Tracks relationship between indices.
1984   NnapiMappingUtilCInterface* const mapping_util_;
1985 
1986   // Keeps mapping of ANN quantized tensor and float data type to equivalent
1987   // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
1988   // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
1989   // tensor #4 to a FLOAT32 tensor.
1990   DequantizeMapping* const dequantize_mapping_;
1991 
1992   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
1993       allocation_memory_mapping_;
1994 
1995   // The NNAPI model.
1996   ANeuralNetworksModel* const nn_model_;
1997 
1998   // Inputs and outputs for the current op. These are augmented in the sense
1999   // that NN API uses operands for all arguments, not just tensors, unlike
2000   // TensorFlow Lite.
2001   std::vector<uint32_t> augmented_inputs_;
2002   std::vector<uint32_t> augmented_outputs_;
2003 
2004   // Return status code of the latest NNAPI call.
2005   int* nnapi_errno_;
2006 
2007   // Whether to allow dynamic batch size without re-compilation.
2008   bool allow_dynamic_dimensions_;
2009 };  // namespace nnapi
2010 
2011 namespace {
2012 struct OpValidationContext {
2013   bool is_valid;
2014   std::vector<NNAPIValidationFailure>* validation_failures;
2015 };
2016 
2017 #define EXPECT_INPUT_TYPE_IN(actual_type, ...)                    \
2018   ExpectTypeIn(actual_type, {__VA_ARGS__},                        \
2019                NNAPIValidationFailureType::kUnsupportedInputType, \
2020                "Input type not in expected list " #__VA_ARGS__, &val_ctx)
2021 
AddValidationFailure(NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)2022 inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
2023                                  const char* message,
2024                                  OpValidationContext* val_ctx) {
2025   val_ctx->is_valid = false;
2026 
2027 #ifdef NNAPI_VERBOSE_VALIDATION
2028   if (val_ctx->validation_failures) {
2029     val_ctx->validation_failures->push_back({failure_type, message});
2030   }
2031 #endif
2032 }
2033 
2034 template <typename... Args>
AddValidationFailureFmt(OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)2035 inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
2036                                     NNAPIValidationFailureType failure_type,
2037                                     const char* message_fmt, Args... args) {
2038   val_ctx->is_valid = false;
2039 #ifdef NNAPI_VERBOSE_VALIDATION
2040   if (val_ctx->validation_failures) {
2041     size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
2042     std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
2043     snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
2044 
2045     val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
2046   }
2047 #endif
2048 }
2049 
Expect(bool condition,NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)2050 inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
2051                    const char* message, OpValidationContext* val_ctx) {
2052   if (!condition) {
2053     AddValidationFailure(failure_type, message, val_ctx);
2054     return false;
2055   }
2056   return true;
2057 }
2058 
2059 template <typename... Args>
ExpectFmt(bool condition,OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)2060 inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
2061                       NNAPIValidationFailureType failure_type,
2062                       const char* message_fmt, Args... args) {
2063   if (!condition) {
2064     AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
2065     return false;
2066   }
2067   return true;
2068 }
2069 
ExpectTypeIn(TfLiteType actual_type,std::initializer_list<TfLiteType> allowed_types,NNAPIValidationFailureType failure_type,const char * msg,OpValidationContext * val_ctx)2070 inline bool ExpectTypeIn(TfLiteType actual_type,
2071                          std::initializer_list<TfLiteType> allowed_types,
2072                          NNAPIValidationFailureType failure_type,
2073                          const char* msg, OpValidationContext* val_ctx) {
2074   return Expect(std::find(allowed_types.begin(), allowed_types.end(),
2075                           actual_type) != allowed_types.end(),
2076                 failure_type, msg, val_ctx);
2077 }
2078 
ExpectMinAndroidSdkVersion(int curr_version,int min_version,OpValidationContext * val_ctx)2079 inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
2080                                        OpValidationContext* val_ctx) {
2081   return ExpectFmt(curr_version >= min_version, val_ctx,
2082                    NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2083                    "Android sdk version less than %d", min_version);
2084 }
2085 
ExpectMaxOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)2086 inline bool ExpectMaxOpVersion(int curr_version, int max_version,
2087                                OpValidationContext* val_ctx) {
2088   return ExpectFmt(curr_version <= max_version, val_ctx,
2089                    NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2090                    "OP Version higher than %d", max_version);
2091 }
2092 
ExpectOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)2093 inline bool ExpectOpVersion(int curr_version, int max_version,
2094                             OpValidationContext* val_ctx) {
2095   return ExpectFmt(curr_version <= max_version, val_ctx,
2096                    NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2097                    "OP Version different from %d", max_version);
2098 }
2099 
ExpectIsFloatOperator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2100 inline bool ExpectIsFloatOperator(const TfLiteContext* context,
2101                                   const TfLiteNode* node,
2102                                   OpValidationContext* val_ctx) {
2103   const auto input_type = context->tensors[node->inputs->data[0]].type;
2104   return Expect(IsFloat(input_type),
2105                 NNAPIValidationFailureType::kUnsupportedInputType,
2106                 "Input should be Float", val_ctx);
2107 }
2108 
ExpectIsFloatOrUint8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2109 bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
2110                                   const TfLiteNode* node,
2111                                   OpValidationContext* val_ctx) {
2112   const auto input_type = context->tensors[node->inputs->data[0]].type;
2113   return Expect(IsFloatOrUInt8(input_type),
2114                 NNAPIValidationFailureType::kUnsupportedInputType,
2115                 "Input should be Float or UINT8", val_ctx);
2116 }
2117 
ExpectIsFloatOrQuant8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2118 bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
2119                                    const TfLiteNode* node,
2120                                    OpValidationContext* val_ctx) {
2121   const auto input_type = context->tensors[node->inputs->data[0]].type;
2122   return Expect(IsFloatOrQuantized(input_type),
2123                 NNAPIValidationFailureType::kUnsupportedInputType,
2124                 "Input should be Float or Quant8", val_ctx);
2125 }
2126 
ExpectIsFloatOrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2127 bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
2128                                   const TfLiteNode* node,
2129                                   OpValidationContext* val_ctx) {
2130   const auto input_type = context->tensors[node->inputs->data[0]].type;
2131   return Expect(IsFloatOrInt32(input_type),
2132                 NNAPIValidationFailureType::kUnsupportedInputType,
2133                 "Input should be Float or Int32", val_ctx);
2134 }
2135 
ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2136 bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
2137                                         const TfLiteNode* node,
2138                                         OpValidationContext* val_ctx) {
2139   const auto input_type = context->tensors[node->inputs->data[0]].type;
2140   return Expect(IsFloatQuantizedOrInt32(input_type),
2141                 NNAPIValidationFailureType::kUnsupportedInputType,
2142                 "Input should be Float, Quant8, or Int32", val_ctx);
2143 }
2144 
2145 // When using NN API version 1.0 or 1.1, the condition below must be true for
2146 // quantized versions of the following ops:
2147 // * CONV_2D
2148 // * DEPTHWISE_CONV_2D
2149 // * FULLY_CONNECTED (where filter actually stands for weights)
2150 // The condition is relaxed and no longer required since version 1.2.
ExpectIsRestrictedScalesCompliant(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2151 bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
2152                                        const TfLiteNode* node,
2153                                        OpValidationContext* val_ctx) {
2154   const int input_id = node->inputs->data[0];
2155   const int filter_id = node->inputs->data[1];
2156   const int output_id = node->outputs->data[0];
2157   const float input_scale = context->tensors[input_id].params.scale;
2158   const float filter_scale = context->tensors[filter_id].params.scale;
2159   const float output_scale = context->tensors[output_id].params.scale;
2160   return Expect(input_scale * filter_scale < output_scale,
2161                 NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
2162                 "When using NN API version 1.0 or 1.1, input_scale * "
2163                 "filter_scale < output_scale.",
2164                 val_ctx);
2165 }
2166 
AppendDynamicDimensions(const TfLiteContext * context,const TfLiteIntArray * tensor_indices,std::vector<int> & dynamic_dimensions)2167 void AppendDynamicDimensions(const TfLiteContext* context,
2168                              const TfLiteIntArray* tensor_indices,
2169                              std::vector<int>& dynamic_dimensions) {
2170   for (int i : TfLiteIntArrayView(tensor_indices)) {
2171     if (i == kTfLiteOptionalTensor) continue;
2172     const auto& tensor = context->tensors[i];
2173     if (tensor.dims_signature) {
2174       for (int i = 0; i < tensor.dims_signature->size; i++) {
2175         if (tensor.dims_signature->data[i] == -1) {
2176           dynamic_dimensions.push_back(tensor.dims->data[i]);
2177         }
2178       }
2179     }
2180   }
2181 }
2182 
CreateExecutionCacheSignature(const TfLiteContext * context,const TfLiteNode * node,const StatefulNnApiDelegate::Options & delegate_options,const std::vector<StatefulNnApiDelegate::MemoryRegistration> & tensor_memory_map)2183 NNAPIExecutionCache::Signature CreateExecutionCacheSignature(
2184     const TfLiteContext* context, const TfLiteNode* node,
2185     const StatefulNnApiDelegate::Options& delegate_options,
2186     const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
2187         tensor_memory_map) {
2188   // Tensor buffer handle timestamps.
2189   std::vector<uint64_t> tensor_handle_timestamps(context->tensors_size);
2190   for (int i = 0; i < tensor_handle_timestamps.size(); i++) {
2191     auto handle = context->tensors[i].buffer_handle;
2192     if (handle < 0 || handle >= tensor_memory_map.size()) {
2193       tensor_handle_timestamps[i] = kNoMemoryTimestamp;
2194     } else {
2195       tensor_handle_timestamps[i] = tensor_memory_map[handle].timestamp;
2196     }
2197   }
2198 
2199   // Dynamic dimensions.
2200   std::vector<int> dynamic_dimensions;
2201   if (delegate_options.allow_dynamic_dimensions) {
2202     AppendDynamicDimensions(context, node->inputs, dynamic_dimensions);
2203     // When using custom ops, we cannot infer output shapes, so it is not part
2204     // of the execution request.
2205     if (delegate_options.vendor_plugin == nullptr) {
2206       AppendDynamicDimensions(context, node->outputs, dynamic_dimensions);
2207     }
2208   }
2209 
2210   return NNAPIExecutionCache::Signature{std::move(tensor_handle_timestamps),
2211                                         std::move(dynamic_dimensions)};
2212 }
2213 
2214 template <typename T>
HashVector(const std::vector<T> & vec)2215 std::size_t HashVector(const std::vector<T>& vec) {
2216   std::size_t seed = vec.size();
2217   auto hasher = std::hash<T>{};
2218   for (const auto& i : vec) {
2219     seed = CombineHashes({seed, hasher(i)});
2220   }
2221   return seed;
2222 }
2223 
2224 }  // namespace
2225 
operator ==(const Signature & other) const2226 bool NNAPIExecutionCache::Signature::operator==(const Signature& other) const {
2227   return tensor_handle_timestamps == other.tensor_handle_timestamps &&
2228          dynamic_dimensions == other.dynamic_dimensions;
2229 }
2230 
operator ()(const Signature & signature) const2231 std::size_t NNAPIExecutionCache::Signature::Hasher::operator()(
2232     const Signature& signature) const {
2233   return CombineHashes({HashVector(signature.tensor_handle_timestamps),
2234                         HashVector(signature.dynamic_dimensions)});
2235 }
2236 
Get(const Signature & signature)2237 ANeuralNetworksExecution* NNAPIExecutionCache::Get(const Signature& signature) {
2238   auto it = lookup_.find(signature);
2239 
2240   // Cache miss
2241   if (it == lookup_.end()) {
2242     return nullptr;
2243   }
2244 
2245   // Cache hit, put the entry to the front
2246   auto& list_it = it->second.first;
2247   order_.erase(list_it);
2248   order_.push_front(signature);
2249   // Update the iterator in the lookup_ map
2250   list_it = order_.begin();
2251 
2252   auto& execution = it->second.second;
2253   return execution.get();
2254 }
2255 
Put(const Signature & signature,UniqueExecution execution)2256 void NNAPIExecutionCache::Put(const Signature& signature,
2257                               UniqueExecution execution) {
2258   // Release the least recently used cache if cache is full.
2259   if (order_.size() >= max_cache_size_) {
2260     ReleaseLRU();
2261   }
2262 
2263   // Register the new cache.
2264   order_.push_front(signature);
2265   lookup_.emplace(signature,
2266                   std::make_pair(order_.begin(), std::move(execution)));
2267 }
2268 
Clear()2269 void NNAPIExecutionCache::Clear() {
2270   order_.clear();
2271   lookup_.clear();
2272 }
2273 
SetMaxCacheSize(uint32_t max_cache_size)2274 void NNAPIExecutionCache::SetMaxCacheSize(uint32_t max_cache_size) {
2275   max_cache_size_ = max_cache_size;
2276   while (order_.size() > max_cache_size_) {
2277     ReleaseLRU();
2278   }
2279 }
2280 
ReleaseLRU()2281 void NNAPIExecutionCache::ReleaseLRU() {
2282   lookup_.erase(order_.back());
2283   order_.pop_back();
2284 }
2285 
2286 // Return a function that knows how to translate a node into its operands
2287 // when called. You can use this function to see if a node is supported
2288 // (i.e. if the returned MappingFn is null, then the node is not supported).
Validate(const TfLiteContext * context,const TfLiteRegistration * registration,int android_sdk_version,const TfLiteNode * node,bool is_accelerator_specified,NnapiDelegateVendorPlugin * vendor_plugin,std::vector<NNAPIValidationFailure> * map_failures)2289 bool NNAPIDelegateKernel::Validate(
2290     const TfLiteContext* context, const TfLiteRegistration* registration,
2291     int android_sdk_version, const TfLiteNode* node,
2292     bool is_accelerator_specified, NnapiDelegateVendorPlugin* vendor_plugin,
2293     std::vector<NNAPIValidationFailure>* map_failures) {
2294   OpValidationContext val_ctx{true, map_failures};
2295   if (vendor_plugin) {
2296     if (vendor_plugin->ValidateNode(context, registration, node)) {
2297       return true;
2298     }
2299   }
2300   auto builtin_code = registration->builtin_code;
2301   auto version = registration->version;
2302   switch (builtin_code) {
2303     case kTfLiteBuiltinAdd: {
2304       ExpectMaxOpVersion(version, 2, &val_ctx);
2305       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2306         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
2307         if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
2308           Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
2309                          ->activation == kTfLiteActNone,
2310                  NNAPIValidationFailureType::kNoActivationExpected,
2311                  "No activation function supported", &val_ctx);
2312         }
2313       } else {
2314         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2315       }
2316     } break;
2317     case kTfLiteBuiltinArgMax:
2318     case kTfLiteBuiltinArgMin: {
2319       ExpectMaxOpVersion(version, 2, &val_ctx);
2320       // Those operators were introduced in NNAPI 1.2.
2321       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2322                                  &val_ctx);
2323       const TfLiteType input_type =
2324           context->tensors[node->inputs->data[(0)]].type;
2325       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2326                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2327 
2328       const auto& axis_tensor = context->tensors[node->inputs->data[1]];
2329       if (axis_tensor.type == kTfLiteInt64) {
2330         Expect(
2331             axis_tensor.allocation_type == kTfLiteMmapRo &&
2332                 *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
2333                 *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
2334             NNAPIValidationFailureType::kUnsupportedInputType,
2335             "NNAPI only supports axis as int32. If the axis type is int64 and "
2336             "constant we can convert it to int32 if the value isn't too "
2337             "large.",
2338             &val_ctx);
2339       } else {
2340         Expect(axis_tensor.type == kTfLiteInt32,
2341                NNAPIValidationFailureType::kUnsupportedInputType,
2342                "Axis should be Int32", &val_ctx);
2343       }
2344       if (builtin_code == kTfLiteBuiltinArgMax) {
2345         auto builtin =
2346             reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
2347         Expect(builtin->output_type == kTfLiteInt32,
2348                NNAPIValidationFailureType::kUnsupportedOutputType,
2349                "NNAPI only supports int32 output.", &val_ctx);
2350       } else {
2351         auto builtin =
2352             reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
2353         Expect(builtin->output_type == kTfLiteInt32,
2354                NNAPIValidationFailureType::kUnsupportedOutputType,
2355                "NNAPI only supports int32 output.", &val_ctx);
2356       }
2357     } break;
2358     case kTfLiteBuiltinMul: {
2359       if (is_accelerator_specified) {
2360         ExpectMaxOpVersion(version, 3, &val_ctx);
2361       } else {
2362         ExpectMaxOpVersion(version, 2, &val_ctx);
2363       }
2364       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2365         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
2366         if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
2367           Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
2368                          ->activation == kTfLiteActNone,
2369                  NNAPIValidationFailureType::kNoActivationExpected,
2370                  "No activation function supported", &val_ctx);
2371         }
2372       } else {
2373         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2374       }
2375     } break;
2376     case kTfLiteBuiltinAveragePool2d: {
2377       ExpectMaxOpVersion(version, 2, &val_ctx);
2378       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2379       auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
2380       // TODO(b/138756912): Large filter window would overflow on the
2381       // quantized reference CPU path.
2382       if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
2383         Expect(is_accelerator_specified ||
2384                    (builtin->filter_width * builtin->filter_height <= 256),
2385                NNAPIValidationFailureType::kUnsupportedOperandSize,
2386                "Large filter window would overflow on the reference CPU path",
2387                &val_ctx);
2388       }
2389     } break;
2390     case kTfLiteBuiltinMaxPool2d: {
2391       ExpectMaxOpVersion(version, 2, &val_ctx);
2392       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2393     } break;
2394     case kTfLiteBuiltinL2Pool2d: {
2395       ExpectOpVersion(version, 1, &val_ctx);
2396       ExpectIsFloatOperator(context, node, &val_ctx);
2397 
2398       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2399         auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
2400         Expect(builtin->activation == kTfLiteActNone,
2401                NNAPIValidationFailureType::kUnsupportedOperandValue,
2402                "Before NNAPI 1.2 fused activation for l2_pool may not be "
2403                "supported.",
2404                &val_ctx);
2405       }
2406     } break;
2407     case kTfLiteBuiltinConv2d: {
2408       ExpectMaxOpVersion(version, 5, &val_ctx);
2409       const auto& input_tensor = context->tensors[node->inputs->data[0]];
2410       const auto& filter_tensor = context->tensors[node->inputs->data[1]];
2411       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2412         Expect(!IsHybridOperator(context, builtin_code, node),
2413                NNAPIValidationFailureType::kUnsupportedHybridOperator,
2414                "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
2415         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2416 
2417         if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
2418           TfLiteAffineQuantization* quantization_params =
2419               static_cast<TfLiteAffineQuantization*>(
2420                   filter_tensor.quantization.params);
2421           Expect(quantization_params->scale->size <= 1,
2422                  NNAPIValidationFailureType::kUnsupportedQuantizationType,
2423                  "Per-channel quantized convolution not supported before NNAPI "
2424                  "1.2.",
2425                  &val_ctx);
2426         }
2427       }
2428       const auto input_type = input_tensor.type;
2429       if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2430           input_type == kTfLiteUInt8) {
2431         ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2432       }
2433       auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
2434       // TODO(b/132950584): Add support for Conv2D with omitted bias.
2435       Expect(node->inputs->size == 3,
2436              NNAPIValidationFailureType::kMissingRequiredOperand,
2437              "Conv2D with omitted bias not supported", &val_ctx);
2438       if (builtin->dilation_width_factor != 1 ||
2439           builtin->dilation_height_factor != 1) {
2440         Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2441                NNAPIValidationFailureType::kUnsupportedOperandValue,
2442                "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
2443       }
2444       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2445         Expect(input_tensor.dims->data[3] == filter_tensor.dims->data[3],
2446                NNAPIValidationFailureType::kUnsupportedOperandValue,
2447                "Grouped convolution not supported before NNAPI < 1.2",
2448                &val_ctx);
2449       }
2450     } break;
2451     case kTfLiteBuiltinDepthwiseConv2d: {
2452       ExpectMaxOpVersion(version, 3, &val_ctx);
2453 
2454       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2455         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2456 
2457         const auto input_type = context->tensors[node->inputs->data[0]].type;
2458         if (input_type == kTfLiteUInt8) {
2459           ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2460         }
2461 
2462         auto builtin =
2463             reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
2464         Expect(builtin->dilation_width_factor == 1 &&
2465                    builtin->dilation_height_factor == 1,
2466                NNAPIValidationFailureType::kUnsupportedOperandValue,
2467                "dilation_width_factor and dilation_height_factor expected to "
2468                "be equal to 1",
2469                &val_ctx);
2470       }
2471     } break;
2472     case kTfLiteBuiltinFullyConnected: {
2473       ExpectMaxOpVersion(version, 5, &val_ctx);
2474       const auto output_type = context->tensors[node->outputs->data[0]].type;
2475       Expect(output_type != kTfLiteInt16,
2476              NNAPIValidationFailureType::kUnsupportedOutputType,
2477              "Unsupported output of type kTfLiteInt16", &val_ctx);
2478       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2479         Expect(!IsHybridOperator(context, builtin_code, node),
2480                NNAPIValidationFailureType::kUnsupportedHybridOperator,
2481                "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
2482         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2483       }
2484       const auto input_type = context->tensors[node->inputs->data[0]].type;
2485       if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2486           input_type == kTfLiteUInt8) {
2487         ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2488       }
2489       auto builtin =
2490           reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
2491       if (builtin->keep_num_dims) {
2492         ExpectMinAndroidSdkVersion(android_sdk_version,
2493                                    kMinSdkVersionForNNAPI13, &val_ctx);
2494       }
2495     } break;
2496     case kTfLiteBuiltinHardSwish: {
2497       // Add support for hardswish. For Pre-Q devices, deconstructing it into
2498       // basic ops. Though for some nnapi accelerators using optimized tflite
2499       // kernels might even be faster.
2500       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2501     } break;
2502     case kTfLiteBuiltinSoftmax: {
2503       ExpectOpVersion(version, 2, &val_ctx);
2504       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2505       const auto& output = context->tensors[node->outputs->data[0]];
2506       ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8},
2507                    NNAPIValidationFailureType::kUnsupportedOutputType,
2508                    "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, "
2509                    "kTfLiteInt8.",
2510                    &val_ctx);
2511       const auto& input = context->tensors[node->inputs->data[0]];
2512       const int input_rank = input.dims->size;
2513       Expect(input_rank <= 4,
2514              NNAPIValidationFailureType::kUnsupportedOperandRank,
2515              "Input rank should be <= 4", &val_ctx);
2516       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2517         Expect(
2518             input_rank == 2 || input_rank == 4,
2519             NNAPIValidationFailureType::kUnsupportedOperandRank,
2520             "Before API level 29 only 2D and 4D input tensors were supported.",
2521             &val_ctx);
2522       }
2523     } break;
2524     case kTfLiteBuiltinReshape: {
2525       ExpectOpVersion(version, 1, &val_ctx);
2526       if (android_sdk_version < kNNAPIRuntimeFeatureLevel6) {
2527         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2528       } else {
2529         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
2530       }
2531       const auto& input = context->tensors[node->inputs->data[0]];
2532       Expect(input.dims->size <= 4,
2533              NNAPIValidationFailureType::kUnsupportedOperandRank,
2534              "Input rank should be <= 4", &val_ctx);
2535       const auto& output = context->tensors[node->outputs->data[0]];
2536       Expect(output.dims->size <= 4,
2537              NNAPIValidationFailureType::kUnsupportedOperandRank,
2538              "Output rank should be <= 4", &val_ctx);
2539       if (node->inputs->size >= 2) {
2540         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2541                    kTfLiteMmapRo,
2542                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2543                "The shape input tensor must be constant.", &val_ctx);
2544       }
2545       if (node->inputs->size == 1) {
2546         // reject scalar reshaping
2547         auto* params =
2548             reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
2549         int num_dimensions = params->num_dimensions;
2550         if (num_dimensions == 1 && params->shape[0] == 0) {
2551           // Legacy tflite models use a shape parameter of [0] to indicate
2552           // scalars.
2553           num_dimensions = 0;
2554         }
2555         Expect(num_dimensions > 0,
2556                NNAPIValidationFailureType::kUnsupportedOperandRank,
2557                "New shape rank should be > 0", &val_ctx);
2558       }
2559     } break;
2560     case kTfLiteBuiltinResizeBilinear: {
2561       ExpectMaxOpVersion(version, 3, &val_ctx);
2562       const auto& input = context->tensors[node->inputs->data[0]];
2563       const auto output_dims = context->tensors[node->outputs->data[0]].dims;
2564       Expect(input.dims->size == 4,
2565              NNAPIValidationFailureType::kUnsupportedOperandRank,
2566              "Input should have rank 4", &val_ctx);
2567       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2568       Expect(node->inputs->size >= 2,
2569              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2570              "Expected at least 2 inputs", &val_ctx);
2571       if (node->inputs->size >= 2) {
2572         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2573                    kTfLiteMmapRo,
2574                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2575                "The size input tensor must be constant.", &val_ctx);
2576       }
2577       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2578         Expect(output_dims->data[1] == output_dims->data[2],
2579                NNAPIValidationFailureType::kUnsupportedOperandValue,
2580                "Require width == height due to driver differences in NNAPI "
2581                "< 1.2",
2582                &val_ctx);
2583       }
2584       auto builtin =
2585           reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
2586       if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2587         Expect(!builtin->align_corners,
2588                NNAPIValidationFailureType::kUnsupportedOperandValue,
2589                "NNAPI does not support align_corners == true.", &val_ctx);
2590         Expect(!builtin->half_pixel_centers,
2591                NNAPIValidationFailureType::kUnsupportedOperandValue,
2592                "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2593       }
2594       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2595         Expect(input.type == kTfLiteFloat32,
2596                NNAPIValidationFailureType::kUnsupportedInputType,
2597                "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
2598       }
2599     } break;
2600     case kTfLiteBuiltinResizeNearestNeighbor: {
2601       ExpectMaxOpVersion(version, 3, &val_ctx);
2602       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2603                                  &val_ctx);
2604       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2605       Expect(node->inputs->size >= 2,
2606              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2607              "Expected at least 2 inputs", &val_ctx);
2608       if (node->inputs->size >= 2) {
2609         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2610                    kTfLiteMmapRo,
2611                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2612                "The size input tensor must be constant.", &val_ctx);
2613       }
2614       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
2615           node->builtin_data);
2616       if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2617         Expect(!builtin->align_corners,
2618                NNAPIValidationFailureType::kUnsupportedOperandValue,
2619                "NNAPI does not support align_corners == true.", &val_ctx);
2620         Expect(!builtin->half_pixel_centers,
2621                NNAPIValidationFailureType::kUnsupportedOperandValue,
2622                "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2623       }
2624     } break;
2625     case kTfLiteBuiltinSqueeze: {
2626       ExpectOpVersion(version, 1, &val_ctx);
2627       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2628                                  &val_ctx);
2629       auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
2630       if (android_sdk_version == kMinSdkVersionForNNAPI11) {
2631         Expect(builtin->num_squeeze_dims != 0,
2632                NNAPIValidationFailureType::kUnsupportedOperandValue,
2633                "NNAPI 1.1 does not support null squeeze_dims properly.",
2634                &val_ctx);
2635       }
2636     } break;
2637     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2638       ExpectMaxOpVersion(version, 2, &val_ctx);
2639       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2640                                  &val_ctx);
2641 
2642       Expect(!IsHybridOperator(context, builtin_code, node),
2643              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2644              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2645 
2646       Expect(node->inputs->size == 20 || node->inputs->size == 24,
2647              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2648              "Supporting only operation with 20 or 24 inputs", &val_ctx);
2649     } break;
2650     case kTfLiteBuiltinL2Normalization: {
2651       ExpectMaxOpVersion(version, 2, &val_ctx);
2652 
2653       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2654         ExpectIsFloatOperator(context, node, &val_ctx);
2655 
2656         const auto& input = context->tensors[node->inputs->data[0]];
2657         Expect(input.dims->size == 4,
2658                NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2659                "Expected 4 inputs", &val_ctx);
2660       }
2661       auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
2662       Expect(builtin->activation == kTfLiteActNone,
2663              NNAPIValidationFailureType::kNoActivationExpected,
2664              "Expected no activation", &val_ctx);
2665     } break;
2666     case kTfLiteBuiltinLocalResponseNormalization: {
2667       ExpectOpVersion(version, 1, &val_ctx);
2668     } break;
2669     case kTfLiteBuiltinLshProjection: {
2670       ExpectOpVersion(version, 1, &val_ctx);
2671 
2672       if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
2673               ->type == kTfLiteLshProjectionSparse) {
2674         // NNAPI does not support sparse projection correctly pre-Q
2675         // (b/111751836).
2676         Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2677                NNAPIValidationFailureType::kUnsupportedInputType,
2678                "NNAPI does not support sparse projection correctly pre-Q",
2679                &val_ctx);
2680         Expect(node->inputs->size == 2,
2681                NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2682                " NNAPI does not support weights for sparse projects.",
2683                &val_ctx);
2684       }
2685     } break;
2686     case kTfLiteBuiltinConcatenation: {
2687       ExpectMaxOpVersion(version, 2, &val_ctx);
2688       Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
2689                      ->activation == kTfLiteActNone,
2690              NNAPIValidationFailureType::kNoActivationExpected,
2691              "No activation function supported", &val_ctx);
2692       Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
2693              NNAPIValidationFailureType::kUnsupportedOperandRank,
2694              "Input rank should be less than 4", &val_ctx);
2695 
2696       const auto& input_type = context->tensors[node->inputs->data[0]].type;
2697       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2698                            kTfLiteUInt8, kTfLiteInt8);
2699 
2700       if (input_type == kTfLiteUInt8 &&
2701           android_sdk_version < kMinSdkVersionForNNAPI12) {
2702         auto first_param = context->tensors[node->inputs->data[0]].params;
2703         for (int i = 1; i < node->inputs->size; i++) {
2704           auto curr_param = context->tensors[node->inputs->data[i]].params;
2705           if (!Expect(curr_param.scale == first_param.scale &&
2706                           curr_param.zero_point == first_param.zero_point,
2707                       NNAPIValidationFailureType::kUnsupportedOperandValue,
2708                       "NNAPI 1.0-1 only supported concatenating quantized "
2709                       "tensor of the same scale and offset.",
2710                       &val_ctx)) {
2711             break;
2712           }
2713         }
2714       }
2715     } break;
2716     case kTfLiteBuiltinDequantize: {
2717       // Allow dequantizing fp16->fp32.
2718       if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2719           context->tensors[node->inputs->data[0]].type == kTfLiteFloat16 &&
2720           context->tensors[node->inputs->data[0]].allocation_type !=
2721               kTfLiteMmapRo) {
2722         return true;
2723       }
2724       Expect(version == 1 || version == 2,
2725              NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2726              "Supported op versions are 1 and 2 only", &val_ctx);
2727 
2728       const auto& input = context->tensors[node->inputs->data[0]];
2729       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2730         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
2731       } else {
2732         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
2733 
2734         if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
2735             input.type == kTfLiteInt8) {
2736           const auto zero_point = input.params.zero_point;
2737           Expect(zero_point == 0,
2738                  NNAPIValidationFailureType::kUnsupportedInputType,
2739                  "NN API supports int8 type since version 1.2 but only for "
2740                  "symmetric quantization.",
2741                  &val_ctx);
2742         }
2743       }
2744     } break;
2745     case kTfLiteBuiltinDensify: {
2746       // Allow densifying sparse weights.
2747       if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2748           context->tensors[node->inputs->data[0]].allocation_type ==
2749               kTfLiteMmapRo) {
2750         return true;
2751       }
2752       return false;
2753     } break;
2754     case kTfLiteBuiltinFloor: {
2755       ExpectOpVersion(version, 1, &val_ctx);
2756     } break;
2757     case kTfLiteBuiltinRelu:
2758     case kTfLiteBuiltinReluN1To1:
2759     case kTfLiteBuiltinRelu6:
2760     case kTfLiteBuiltinLogistic: {
2761       ExpectMaxOpVersion(version, 2, &val_ctx);
2762       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2763     } break;
2764     case kTfLiteBuiltinTanh: {
2765       ExpectMaxOpVersion(version, 2, &val_ctx);
2766       const TfLiteType input_type =
2767           context->tensors[node->inputs->data[0]].type;
2768       Expect(IsFloat(input_type) ||
2769                  (IsQuantized(input_type) &&
2770                   android_sdk_version >= kMinSdkVersionForNNAPI12),
2771              NNAPIValidationFailureType::kUnsupportedInputType,
2772              " NNAPI only support float tanh.", &val_ctx);
2773     } break;
2774     case kTfLiteBuiltinSub: {
2775       ExpectMaxOpVersion(version, 3, &val_ctx);
2776       const TfLiteType input_type =
2777           context->tensors[node->inputs->data[0]].type;
2778       Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
2779               IsFloat(input_type)) ||
2780                  (android_sdk_version >= kMinSdkVersionForNNAPI12 &&
2781                   IsQuantized(input_type)) ||
2782                  (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2783                   IsInt32(input_type)),
2784              NNAPIValidationFailureType::kUnsupportedInputType,
2785              "NNAPI only support float sub.", &val_ctx);
2786       if (IsInt32(input_type)) {
2787         Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
2788                        ->activation == kTfLiteActNone,
2789                NNAPIValidationFailureType::kNoActivationExpected,
2790                "No activation function supported", &val_ctx);
2791       }
2792       const int input0_rank =
2793           context->tensors[node->inputs->data[0]].dims->size;
2794       const int input1_rank =
2795           context->tensors[node->inputs->data[1]].dims->size;
2796       Expect(input0_rank <= 4 && input1_rank <= 4,
2797              NNAPIValidationFailureType::kUnsupportedOperandRank,
2798              "Input rank must be <= 4", &val_ctx);
2799     } break;
2800     case kTfLiteBuiltinDiv: {
2801       ExpectOpVersion(version, 1, &val_ctx);
2802       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2803                                  &val_ctx);
2804       Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2805              NNAPIValidationFailureType::kUnsupportedInputType,
2806              "NNAPI only support float div.", &val_ctx);
2807     } break;
2808     case kTfLiteBuiltinPad:
2809     case kTfLiteBuiltinPadv2: {
2810       ExpectMaxOpVersion(version, 2, &val_ctx);
2811       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2812       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2813                                  &val_ctx);
2814 
2815       const TfLiteIntArrayView input_shape(
2816           context->tensors[node->inputs->data[0]].dims);
2817       Expect(!HasZeroes(input_shape),
2818              NNAPIValidationFailureType::kUnsupportedOperandValue,
2819              "NN API pad ops do not support input tensors with no elements",
2820              &val_ctx);
2821 
2822       Expect(node->inputs->size >= 2,
2823              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2824              "Expecting at least 2 inputs", &val_ctx);
2825 
2826       if (node->inputs->size == 3) {
2827         // This is going to be mapped with a PadV2
2828         Expect(
2829             android_sdk_version >= kMinSdkVersionForNNAPI12,
2830             NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2831             "Specification of the padding value is supported from NNAPI 1.2.",
2832             &val_ctx);
2833       } else {  // this is going to be mapped as Pad
2834         if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2835           Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2836                  NNAPIValidationFailureType::kUnsupportedInputType,
2837                  "Only Float32 inputs are supported before NNAPI 1.2",
2838                  &val_ctx);
2839         }
2840       }
2841     } break;
2842     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
2843       ExpectOpVersion(version, 1, &val_ctx);
2844       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2845                                  &val_ctx);
2846       Expect(!IsHybridOperator(context, builtin_code, node),
2847              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2848              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2849     } break;
2850     case kTfLiteBuiltinSpaceToBatchNd: {
2851       ExpectMaxOpVersion(version, 2, &val_ctx);
2852       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2853                                  &val_ctx);
2854     } break;
2855     case kTfLiteBuiltinBatchToSpaceNd: {
2856       ExpectMaxOpVersion(version, 2, &val_ctx);
2857       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2858                                  &val_ctx);
2859       auto crops = context->tensors[node->inputs->data[2]];
2860       auto crops_data = crops.data.i32;
2861       Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
2862                  crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
2863              NNAPIValidationFailureType::kUnsupportedOperandValue,
2864              "All crops should be 0.", &val_ctx);
2865     } break;
2866     case kTfLiteBuiltinStridedSlice: {
2867       ExpectMaxOpVersion(version, 2, &val_ctx);
2868       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2869       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2870                                  &val_ctx);
2871     } break;
2872     case kTfLiteBuiltinTranspose: {
2873       ExpectMaxOpVersion(version, 2, &val_ctx);
2874       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2875                                  &val_ctx);
2876       // Note that the permutation input tensor value dictates the output
2877       // dimensions.
2878       // TODO(b/110888333): Support dynamically-sized tensors in delegates.
2879       Expect((node->inputs->size > 1) &&
2880                  (context->tensors[node->inputs->data[1]].allocation_type ==
2881                   kTfLiteMmapRo),
2882              NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2883              "Dynamically-sized tensors not supported.", &val_ctx);
2884     } break;
2885     case kTfLiteBuiltinAbs:
2886     case kTfLiteBuiltinExp:
2887     case kTfLiteBuiltinLog:
2888     case kTfLiteBuiltinPow: {
2889       ExpectOpVersion(version, 1, &val_ctx);
2890       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2891                                  &val_ctx);
2892       ExpectIsFloatOperator(context, node, &val_ctx);
2893     } break;
2894     case kTfLiteBuiltinRsqrt: {
2895       ExpectOpVersion(version, 2, &val_ctx);
2896       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2897                                  &val_ctx);
2898       if (android_sdk_version < kNNAPIRuntimeFeatureLevel7) {
2899         ExpectIsFloatOperator(context, node, &val_ctx);
2900       } else {
2901         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2902       }
2903     } break;
2904     case kTfLiteBuiltinSlice: {
2905       ExpectMaxOpVersion(version, 2, &val_ctx);
2906       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2907                                  &val_ctx);
2908       const auto input_type = context->tensors[node->inputs->data[0]].type;
2909       const auto begin_type = context->tensors[node->inputs->data[1]].type;
2910       const auto size_type = context->tensors[node->inputs->data[2]].type;
2911       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2912                            kTfLiteUInt8, kTfLiteInt8);
2913       Expect(begin_type == kTfLiteInt32,
2914              NNAPIValidationFailureType::kUnsupportedInputType,
2915              "Begin type should be Int32", &val_ctx);
2916       Expect(size_type == kTfLiteInt32,
2917              NNAPIValidationFailureType::kUnsupportedInputType,
2918              "Size type should be Int32", &val_ctx);
2919     } break;
2920     case kTfLiteBuiltinSin: {
2921       ExpectOpVersion(version, 1, &val_ctx);
2922       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2923                                  &val_ctx);
2924       ExpectIsFloatOperator(context, node, &val_ctx);
2925     } break;
2926     case kTfLiteBuiltinTransposeConv: {
2927       ExpectMaxOpVersion(version, 3, &val_ctx);
2928       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2929                                  &val_ctx);
2930       Expect((node->inputs->size > 1) &&
2931                  (context->tensors[node->inputs->data[0]].allocation_type ==
2932                   kTfLiteMmapRo) &&
2933                  (context->tensors[node->inputs->data[1]].allocation_type ==
2934                   kTfLiteMmapRo),
2935              NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2936              "Dynamically-sized tensors not supported.", &val_ctx);
2937     } break;
2938     case kTfLiteBuiltinSqrt: {
2939       ExpectOpVersion(version, 1, &val_ctx);
2940       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2941                                  &val_ctx);
2942       ExpectIsFloatOperator(context, node, &val_ctx);
2943     } break;
2944     case kTfLiteBuiltinRnn: {
2945       ExpectOpVersion(version, 1, &val_ctx);
2946       Expect(node->inputs->size == 5,
2947              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2948              "Expected 5 input", &val_ctx);
2949       if (node->inputs->size >= 2) {
2950         Expect(
2951             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2952                 kTfLiteFloat32,
2953             NNAPIValidationFailureType::kUnsupportedInputType,
2954             "NNAPI only support float32 weights.", &val_ctx);
2955       }
2956     } break;
2957     case kTfLiteBuiltinSpaceToDepth: {
2958       ExpectMaxOpVersion(version, 2, &val_ctx);
2959       const TfLiteType input_type =
2960           context->tensors[node->inputs->data[0]].type;
2961       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2962                            kTfLiteInt8);
2963     } break;
2964     case kTfLiteBuiltinSvdf: {
2965       ExpectOpVersion(version, 1, &val_ctx);
2966       Expect(node->inputs->size == 5,
2967              NNAPIValidationFailureType::kUnsupportedOperandRank,
2968              "Expected input of rank 5", &val_ctx);
2969       if (node->inputs->size >= 2) {
2970         Expect(
2971             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2972                 kTfLiteFloat32,
2973             NNAPIValidationFailureType::kUnsupportedInputType,
2974             "NNAPI only support float32 weights.", &val_ctx);
2975       }
2976       Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
2977              NNAPIValidationFailureType::kUnsupportedOperandRank,
2978              "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
2979       Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
2980                      .type == kTfLiteFloat32,
2981              NNAPIValidationFailureType::kUnsupportedInputType,
2982              "Weights should be Float32", &val_ctx);
2983     } break;
2984     case kTfLiteBuiltinLstm: {
2985       ExpectMaxOpVersion(version, 3, &val_ctx);
2986       Expect(
2987           android_sdk_version >= kMinSdkVersionForNNAPI11,
2988           NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2989           "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
2990           &val_ctx);
2991       Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
2992                  !IsHybridOperator(context, builtin_code, node),
2993              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2994              "Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
2995 
2996       const auto weight_input_index =
2997           isLstmBasicKernel(node) ? 2 /*  basic::kInputWeights */
2998                                   : 4 /* full::kInputToOutputWeightsTensor */;
2999 
3000       const TfLiteType weight_type =
3001           context->tensors[node->inputs->data[weight_input_index]].type;
3002 
3003       if (isLstmBasicKernel(node)) {
3004         Expect(weight_type == kTfLiteUInt8,
3005                NNAPIValidationFailureType::kUnsupportedInputType,
3006                "Basic LSTM Kernels support only UINT8 weights", &val_ctx);
3007 
3008         const auto input_quantization_params =
3009             context->tensors[node->inputs->data[0]].params;
3010         Expect(input_quantization_params.scale == 1. / 128. &&
3011                    input_quantization_params.zero_point == 128,
3012                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3013                "Invalid input quantization", &val_ctx);
3014 
3015         const auto output_quantization_params =
3016             context->tensors[node->outputs->data[0]].params;
3017         Expect(output_quantization_params.scale == 1. / 128. &&
3018                    output_quantization_params.zero_point == 128,
3019                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3020                "Invalid output quantization", &val_ctx);
3021 
3022         const auto cell_state_quantization_params =
3023             context->tensors[node->outputs->data[1]].params;
3024         Expect(cell_state_quantization_params.scale == 16. / 32768. ||
3025                    cell_state_quantization_params.zero_point == 0,
3026                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3027                "Invalid cell state quantization", &val_ctx);
3028 
3029         auto is_const_tensor = [&node, &context](int tensor_idx) {
3030           return context->tensors[node->inputs->data[tensor_idx]]
3031                      .allocation_type == kTfLiteMmapRo;
3032         };
3033 
3034         Expect(is_const_tensor(2 /* kInputWeights */),
3035                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
3036                "Weights tensor should be constant", &val_ctx);
3037         Expect(is_const_tensor(3 /* kInputBiases */),
3038                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
3039                "Biases tensor should be constant", &val_ctx);
3040 
3041         return val_ctx.is_valid;
3042       } else {
3043         if (node->inputs->size == 24) {
3044           ExpectMinAndroidSdkVersion(android_sdk_version,
3045                                      kMinSdkVersionForNNAPI12, &val_ctx);
3046         }
3047 
3048         if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3049           Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
3050                      weight_type == kTfLiteInt8,
3051                  NNAPIValidationFailureType::kUnsupportedInputType,
3052                  "Weight has to be Float32 or UINT8 or INT8", &val_ctx);
3053         } else {
3054           Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
3055                  NNAPIValidationFailureType::kUnsupportedInputType,
3056                  "Weight has to be Float32 or UINT8", &val_ctx);
3057         }
3058       }
3059     } break;
3060     case kTfLiteBuiltinMean: {
3061       ExpectMaxOpVersion(version, 2, &val_ctx);
3062       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
3063                                  &val_ctx);
3064       if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
3065         Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
3066                    IsQuantized(context->tensors[node->inputs->data[0]].type),
3067                NNAPIValidationFailureType::kUnsupportedInputType,
3068                "Expected Float32 or Quantized input", &val_ctx);
3069       } else {
3070         Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
3071                NNAPIValidationFailureType::kUnsupportedInputType,
3072                "Expected Float32 input", &val_ctx);
3073       }
3074       Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
3075              NNAPIValidationFailureType::kUnsupportedOutputType,
3076              "NNAPI does not support generating a scalar as output for MEAN.",
3077              &val_ctx);
3078     } break;
3079     case kTfLiteBuiltinEmbeddingLookup: {
3080       ExpectOpVersion(version, 1, &val_ctx);
3081       Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
3082              NNAPIValidationFailureType::kUnsupportedInputType,
3083              "NNAPI only support float32 values.", &val_ctx);
3084     } break;
3085     case kTfLiteBuiltinHashtableLookup: {
3086       ExpectOpVersion(version, 1, &val_ctx);
3087       Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
3088              NNAPIValidationFailureType::kUnsupportedOutputType,
3089              "NNAPI only support float32 output.", &val_ctx);
3090     } break;
3091     case kTfLiteBuiltinMaximum:
3092     case kTfLiteBuiltinMinimum: {
3093       ExpectMaxOpVersion(version, 3, &val_ctx);
3094       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3095                                  &val_ctx);
3096       const auto input_type = context->tensors[node->inputs->data[0]].type;
3097       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3098                            kTfLiteInt8, kTfLiteInt32);
3099       const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
3100       if (operand0.dims->size == 0) {
3101         Expect(operand0.allocation_type == kTfLiteMmapRo,
3102                NNAPIValidationFailureType::kUnsupportedInputType,
3103                "Scalar operand should be constant", &val_ctx);
3104       }
3105       const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
3106       if (operand1.dims->size == 0) {
3107         Expect(operand1.allocation_type == kTfLiteMmapRo,
3108                NNAPIValidationFailureType::kUnsupportedInputType,
3109                "Scalar operand should be constant", &val_ctx);
3110       }
3111     } break;
3112     case kTfLiteBuiltinCast: {
3113       ExpectOpVersion(version, 1, &val_ctx);
3114       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3115                                  &val_ctx);
3116       const TfLiteType input_type =
3117           context->tensors[node->inputs->data[0]].type;
3118       const TfLiteType output_type =
3119           context->tensors[node->outputs->data[0]].type;
3120       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3121         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
3122                              kTfLiteUInt8, kTfLiteInt8);
3123 
3124         ExpectTypeIn(
3125             output_type,
3126             {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
3127             NNAPIValidationFailureType::kUnsupportedOutputType,
3128             "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
3129             "kTfLiteUInt8, kTfLiteInt8.",
3130             &val_ctx);
3131       } else {
3132         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
3133                              kTfLiteUInt8);
3134 
3135         ExpectTypeIn(
3136             output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
3137             NNAPIValidationFailureType::kUnsupportedOutputType,
3138             "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
3139             "kTfLiteUInt8.",
3140             &val_ctx);
3141       }
3142     } break;
3143     case kTfLiteBuiltinLeakyRelu:
3144     case kTfLiteBuiltinPrelu: {
3145       ExpectOpVersion(version, 1, &val_ctx);
3146       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3147                                  &val_ctx);
3148       const auto input_type = context->tensors[node->inputs->data[0]].type;
3149       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3150                            kTfLiteInt8);
3151     } break;
3152     case kTfLiteBuiltinTile: {
3153       ExpectOpVersion(version, 1, &val_ctx);
3154       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3155                                  &val_ctx);
3156       const auto input_type = context->tensors[node->inputs->data[0]].type;
3157       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
3158                            kTfLiteUInt8, kTfLiteInt32);
3159       const auto multipliers_type =
3160           context->tensors[node->inputs->data[1]].type;
3161       Expect(multipliers_type == kTfLiteInt32,
3162              NNAPIValidationFailureType::kUnsupportedInputType,
3163              "Multipliers should be Int32", &val_ctx);
3164     } break;
3165     case kTfLiteBuiltinLogicalOr:
3166     case kTfLiteBuiltinLogicalAnd:
3167     case kTfLiteBuiltinLogicalNot: {
3168       ExpectOpVersion(version, 1, &val_ctx);
3169       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3170                                  &val_ctx);
3171       const auto input_type = context->tensors[node->inputs->data[0]].type;
3172       Expect(input_type == kTfLiteBool,
3173              NNAPIValidationFailureType::kUnsupportedInputType,
3174              "Input should be bool", &val_ctx);
3175     } break;
3176     case kTfLiteBuiltinLess:
3177     case kTfLiteBuiltinLessEqual:
3178     case kTfLiteBuiltinGreater:
3179     case kTfLiteBuiltinGreaterEqual:
3180     case kTfLiteBuiltinEqual:
3181     case kTfLiteBuiltinNotEqual: {
3182       ExpectMaxOpVersion(version, 2, &val_ctx);
3183       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3184                                  &val_ctx);
3185       const auto input_type = context->tensors[node->inputs->data[0]].type;
3186       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3187                            kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
3188     } break;
3189     case kTfLiteBuiltinNeg: {
3190       ExpectMaxOpVersion(version, 2, &val_ctx);
3191       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3192                                  &val_ctx);
3193       const auto input_type = context->tensors[node->inputs->data[0]].type;
3194       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
3195     } break;
3196     case kTfLiteBuiltinTopkV2: {
3197       ExpectMaxOpVersion(version, 2, &val_ctx);
3198       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3199                                  &val_ctx);
3200       const auto& input_type = context->tensors[node->inputs->data[0]].type;
3201       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
3202                            kTfLiteUInt8, kTfLiteInt8);
3203       const auto& k_param = context->tensors[node->inputs->data[1]];
3204       Expect(k_param.type == kTfLiteInt32 &&
3205                  k_param.allocation_type == kTfLiteMmapRo,
3206              NNAPIValidationFailureType::kUnsupportedInputType,
3207              "K param should be a constant of type Int32", &val_ctx);
3208     } break;
3209     case kTfLiteBuiltinSelect: {
3210       ExpectMaxOpVersion(version, 2, &val_ctx);
3211       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3212                                  &val_ctx);
3213       const auto value_type = context->tensors[node->inputs->data[1]].type;
3214       EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
3215                            kTfLiteUInt8, kTfLiteInt8);
3216       TfLiteIntArray* condition_shape =
3217           context->tensors[node->inputs->data[0]].dims;
3218       TfLiteIntArray* input_shape =
3219           context->tensors[node->inputs->data[1]].dims;
3220       Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
3221              NNAPIValidationFailureType::kUnsupportedOperandValue,
3222              "Condition and inputs tensors should have the same shape",
3223              &val_ctx);
3224     } break;
3225     case kTfLiteBuiltinGather: {
3226       ExpectOpVersion(version, 2, &val_ctx);
3227       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3228                                  &val_ctx);
3229       const auto input_type = context->tensors[node->inputs->data[0]].type;
3230       const auto& positions = context->tensors[node->inputs->data[1]];
3231 
3232       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
3233                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
3234 
3235       Expect(positions.type == kTfLiteInt32,
3236              NNAPIValidationFailureType::kUnsupportedInputType,
3237              "Positions type should be one of kTfLiteInt32", &val_ctx);
3238       Expect(positions.dims->size != 0,
3239              NNAPIValidationFailureType::kUnsupportedOperandRank,
3240              "0-dimension args are not supported by NNAPI.", &val_ctx);
3241     } break;
3242     case kTfLiteBuiltinBidirectionalSequenceLstm: {
3243       ExpectOpVersion(version, 1, &val_ctx);
3244       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3245                                  &val_ctx);
3246       Expect(!IsHybridOperator(context, builtin_code, node),
3247              NNAPIValidationFailureType::kUnsupportedHybridOperator,
3248              "Hybrid version of this op is not supported by NN API.", &val_ctx);
3249     } break;
3250     case kTfLiteBuiltinExpandDims: {
3251       ExpectOpVersion(version, 1, &val_ctx);
3252       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3253                                  &val_ctx);
3254       const auto input_type = context->tensors[node->inputs->data[0]].type;
3255       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
3256                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
3257       const auto axis = context->tensors[node->inputs->data[1]];
3258       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
3259              NNAPIValidationFailureType::kUnsupportedInputType,
3260              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
3261     } break;
3262     case kTfLiteBuiltinSplit: {
3263       ExpectOpVersion(version, 3, &val_ctx);
3264       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3265                                  &val_ctx);
3266       // Tensor indices: split_dim: 0, value: 1
3267       const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
3268       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3269         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
3270                              kTfLiteInt8, kTfLiteInt32);
3271       } else {
3272         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
3273                              kTfLiteInt32);
3274       }
3275       const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
3276       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
3277              NNAPIValidationFailureType::kUnsupportedInputType,
3278              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
3279     } break;
3280     case kTfLiteBuiltinSplitV: {
3281       ExpectOpVersion(version, 2, &val_ctx);
3282       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3283                                  &val_ctx);
3284       // Tensor indices: value: 0, size_splits: 1, axis: 2
3285       const TfLiteTensor& input = context->tensors[node->inputs->data[0]];
3286       const TfLiteTensor& size_splits = context->tensors[node->inputs->data[1]];
3287       const TfLiteTensor& axis = context->tensors[node->inputs->data[2]];
3288       EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
3289                            kTfLiteInt8, kTfLiteInt32);
3290       bool size_splits_is_int32_const_vector =
3291           size_splits.type == kTfLiteInt32 && size_splits.dims->size == 1 &&
3292           size_splits.allocation_type == kTfLiteMmapRo;
3293       bool axis_is_int32_const =
3294           axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo;
3295       Expect(size_splits_is_int32_const_vector,
3296              NNAPIValidationFailureType::kUnsupportedInputType,
3297              "NNAPI only supports constant int32 size_splits vector.",
3298              &val_ctx);
3299       Expect(axis_is_int32_const,
3300              NNAPIValidationFailureType::kUnsupportedInputType,
3301              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
3302       if (size_splits_is_int32_const_vector && axis_is_int32_const) {
3303         Expect(std::all_of(size_splits.data.i32,
3304                            size_splits.data.i32 + size_splits.dims->data[0],
3305                            [](auto size) { return size != 0; }),
3306                NNAPIValidationFailureType::kUnsupportedInputType,
3307                "NNAPI only supports non-zero split sizes.", &val_ctx);
3308         Expect(ComputeSplitVUnknownSplitSize(context, node) != 0,
3309                NNAPIValidationFailureType::kUnsupportedInputType,
3310                "NNAPI only supports non-zero split sizes.", &val_ctx);
3311       }
3312     } break;
3313     case kTfLiteBuiltinLogSoftmax: {
3314       ExpectOpVersion(version, 1, &val_ctx);
3315       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3316                                  &val_ctx);
3317       const auto input_type = context->tensors[node->inputs->data[0]].type;
3318       Expect(input_type == kTfLiteFloat32,
3319              NNAPIValidationFailureType::kUnsupportedInputType,
3320              "Input should be Float32.", &val_ctx);
3321     } break;
3322     case kTfLiteBuiltinQuantize: {
3323       ExpectMaxOpVersion(version, 2, &val_ctx);
3324       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3325                                  &val_ctx);
3326       const auto value_type = context->tensors[node->inputs->data[0]].type;
3327       Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
3328              NNAPIValidationFailureType::kUnsupportedInputType,
3329              "Value should be quantized or Float32.", &val_ctx);
3330       if (IsQuantized(value_type)) {
3331         const auto quantization_params =
3332             context->tensors[node->inputs->data[0]].params;
3333         Expect(quantization_params.scale > 0.f,
3334                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3335                "Quantization scale should be > 0.", &val_ctx);
3336       }
3337       const auto output_type = context->tensors[node->outputs->data[0]].type;
3338       if (android_sdk_version < kMinSdkVersionForNNAPI13) {
3339         Expect(output_type == kTfLiteUInt8,
3340                NNAPIValidationFailureType::kUnsupportedOutputType,
3341                "Output should be kTfLiteUInt8.", &val_ctx);
3342       } else {
3343         ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
3344                      NNAPIValidationFailureType::kUnsupportedOutputType,
3345                      "Output should be kTfLiteUInt8.", &val_ctx);
3346       }
3347       const auto quantization_params =
3348           context->tensors[node->outputs->data[0]].params;
3349       Expect(quantization_params.scale > 0.f,
3350              NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3351              "Quantization scale should be > 0.", &val_ctx);
3352     } break;
3353     case kTfLiteBuiltinReduceAny: {
3354       ExpectOpVersion(version, 2, &val_ctx);
3355       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3356                                  &val_ctx);
3357       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
3358              NNAPIValidationFailureType::kUnsupportedOutputType,
3359              "NNAPI does not support generating a scalar as output.", &val_ctx);
3360     } break;
3361     case kTfLiteBuiltinReduceMin:
3362     case kTfLiteBuiltinReduceMax: {
3363       ExpectMaxOpVersion(version, 2, &val_ctx);
3364       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3365                                  &val_ctx);
3366       const auto input_tensor = context->tensors[node->inputs->data[0]];
3367       const auto input_type = input_tensor.type;
3368       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3369                            kTfLiteInt8);
3370       Expect(input_tensor.dims->size != 0,
3371              NNAPIValidationFailureType::kUnsupportedOutputType,
3372              "NNAPI does not support generating a scalar as output.", &val_ctx);
3373     } break;
3374     case kTfLiteBuiltinDepthToSpace: {
3375       const TfLiteType input_type =
3376           context->tensors[node->inputs->data[0]].type;
3377       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3378                            kTfLiteInt8);
3379     } break;
3380     case kTfLiteBuiltinReduceProd:
3381     case kTfLiteBuiltinSum: {
3382       ExpectOpVersion(version, 1, &val_ctx);
3383       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3384                                  &val_ctx);
3385       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
3386              NNAPIValidationFailureType::kUnsupportedOutputType,
3387              "NNAPI does not support generating a scalar as output", &val_ctx);
3388       const auto input_type = context->tensors[node->inputs->data[0]].type;
3389       Expect(input_type == kTfLiteFloat32,
3390              NNAPIValidationFailureType::kUnsupportedInputType,
3391              "NNAPI only supports floating point input.", &val_ctx);
3392     } break;
3393     case kTfLiteBuiltinElu: {
3394       ExpectOpVersion(version, 1, &val_ctx);
3395       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3396                                  &val_ctx);
3397       const auto input_type = context->tensors[node->inputs->data[0]].type;
3398       Expect(input_type == kTfLiteFloat32,
3399              NNAPIValidationFailureType::kUnsupportedInputType,
3400              "NNAPI only supports floating point input.", &val_ctx);
3401     } break;
3402     case kTfLiteBuiltinFill: {
3403       ExpectOpVersion(version, 1, &val_ctx);
3404       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3405                                  &val_ctx);
3406       const auto& dims_tensor = context->tensors[node->inputs->data[0]];
3407       Expect(IsConstantTensor(&dims_tensor),
3408              NNAPIValidationFailureType::kUnsupportedInputType,
3409              "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
3410       EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
3411       if (IsConstantTensor(&dims_tensor)) {
3412         Expect(dims_tensor.dims->data[0] != 0,
3413                NNAPIValidationFailureType::kUnsupportedOperandValue,
3414                "NNAPI doesn't support generating scalars from FILL", &val_ctx);
3415         if (dims_tensor.type == kTfLiteInt64) {
3416           bool fit_in_int32 =
3417               std::all_of(dims_tensor.data.i64,
3418                           dims_tensor.data.i64 + dims_tensor.dims->data[0],
3419                           [](int64_t dim) {
3420                             return std::numeric_limits<int32_t>::min() <= dim &&
3421                                    dim <= std::numeric_limits<int32_t>::max();
3422                           });
3423           Expect(fit_in_int32,
3424                  NNAPIValidationFailureType::kUnsupportedOperandValue,
3425                  "NNAPI only supports int32 dimensions tensor. If the "
3426                  "dimensions type is int64 and they are constant we can "
3427                  "convert them to int32 if the value isn't too large.",
3428                  &val_ctx);
3429         }
3430       }
3431       const auto& value_tensor = context->tensors[node->inputs->data[1]];
3432       EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
3433                            kTfLiteInt64);
3434       if (value_tensor.type == kTfLiteInt64 &&
3435           IsConstantTensor(&value_tensor)) {
3436         Expect(
3437             *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
3438                 *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
3439             NNAPIValidationFailureType::kUnsupportedInputType,
3440             "NNAPI only supports int32 input. If the input type is int64 and "
3441             "constant we can convert it to int32 if the value isn't too "
3442             "large.",
3443             &val_ctx);
3444       }
3445     } break;
3446     case kTfLiteBuiltinPack: {
3447       ExpectOpVersion(version, 2, &val_ctx);
3448       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3449                                  &val_ctx);
3450       const auto input_type = context->tensors[node->inputs->data[0]].type;
3451       if (android_sdk_version >= kNNAPIRuntimeFeatureLevel6) {
3452         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32,
3453                              kTfLiteInt8, kTfLiteUInt8);
3454       } else {
3455         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8);
3456         auto builtin = reinterpret_cast<TfLitePackParams*>(node->builtin_data);
3457         Expect(builtin->axis != -1 &&
3458                    builtin->axis !=
3459                        context->tensors[node->inputs->data[0]].dims->size,
3460                NNAPIValidationFailureType::kUnsupportedOperandValue,
3461                "NNAPI does not support axis being the last dimension",
3462                &val_ctx);
3463       }
3464     } break;
3465     case kTfLiteBuiltinUnpack: {
3466       ExpectOpVersion(version, 2, &val_ctx);
3467       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3468                                  &val_ctx);
3469       const auto input_type = context->tensors[node->inputs->data[0]].type;
3470       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3471                            kTfLiteInt8);
3472       Expect(context->tensors[node->inputs->data[0]].dims->size > 1,
3473              NNAPIValidationFailureType::kUnsupportedOperandValue,
3474              "NNAPI does not support unpacking a rank-1 tensor", &val_ctx);
3475       Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
3476              NNAPIValidationFailureType::kUnsupportedOperandValue,
3477              "NNAPI does not support unpacking a tensor with rank > 4",
3478              &val_ctx);
3479       const auto* builtin =
3480           reinterpret_cast<const TfLiteUnpackParams*>(node->builtin_data);
3481       Expect(builtin->axis != -1 &&
3482                  builtin->axis !=
3483                      context->tensors[node->inputs->data[0]].dims->size - 1,
3484              NNAPIValidationFailureType::kUnsupportedOperandValue,
3485              "NNAPI does not support axis being the last dimension", &val_ctx);
3486     } break;
3487     case kTfLiteBuiltinSquaredDifference: {
3488       ExpectOpVersion(version, 2, &val_ctx);
3489       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
3490                                  &val_ctx);
3491       const auto input0_type = context->tensors[node->inputs->data[0]].type;
3492       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3493         EXPECT_INPUT_TYPE_IN(input0_type, kTfLiteFloat32, kTfLiteUInt8,
3494                              kTfLiteInt8, kTfLiteInt32);
3495       } else if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
3496         EXPECT_INPUT_TYPE_IN(input0_type, kTfLiteFloat32, kTfLiteUInt8);
3497       } else {
3498         EXPECT_INPUT_TYPE_IN(input0_type, kTfLiteFloat32);
3499       }
3500       const int input0_rank =
3501           context->tensors[node->inputs->data[0]].dims->size;
3502       const int input1_rank =
3503           context->tensors[node->inputs->data[1]].dims->size;
3504       Expect(input0_rank <= 4 && input1_rank <= 4,
3505              NNAPIValidationFailureType::kUnsupportedOperandRank,
3506              "NNAPI does not support input rank greater than 4", &val_ctx);
3507     } break;
3508     case kTfLiteBuiltinBatchMatmul: {
3509       ExpectOpVersion(version, 2, &val_ctx);
3510       ExpectMinAndroidSdkVersion(android_sdk_version,
3511                                  kNNAPIRuntimeFeatureLevel6, &val_ctx);
3512       const auto& input0 = context->tensors[node->inputs->data[0]];
3513       const auto& input1 = context->tensors[node->inputs->data[1]];
3514       EXPECT_INPUT_TYPE_IN(input0.type, kTfLiteFloat32, kTfLiteInt32,
3515                            kTfLiteInt8);
3516       Expect(input0.type == input1.type,
3517              NNAPIValidationFailureType::kUnsupportedHybridOperator,
3518              "NNAPI does not support hybrid batch matmul", &val_ctx);
3519       Expect(input0.dims->size <= 4 && input0.dims->size >= 2,
3520              NNAPIValidationFailureType::kUnsupportedOperandRank,
3521              "NNAPI does not support input rank greater than 4 or less than 2",
3522              &val_ctx);
3523       Expect(!IsBroadcastBatchMatMul(context, node),
3524              NNAPIValidationFailureType::kUnsupportedInputType,
3525              "NNAPI does not support broadcast batch matmul", &val_ctx);
3526     } break;
3527     case kTfLiteBuiltinMirrorPad: {
3528       ExpectMaxOpVersion(version, 2, &val_ctx);
3529       ExpectMinAndroidSdkVersion(android_sdk_version,
3530                                  kNNAPIRuntimeFeatureLevel7, &val_ctx);
3531       ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
3532 
3533       const TfLiteIntArrayView input_shape(
3534           context->tensors[node->inputs->data[0]].dims);
3535       Expect(!HasZeroes(input_shape),
3536              NNAPIValidationFailureType::kUnsupportedOperandValue,
3537              "NN API pad ops do not support input tensors with no elements",
3538              &val_ctx);
3539       Expect(node->inputs->size == 2,
3540              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
3541              "Expecting 2 inputs", &val_ctx);
3542     } break;
3543     case kTfLiteBuiltinReverseV2: {
3544       ExpectMaxOpVersion(version, 3, &val_ctx);
3545       ExpectMinAndroidSdkVersion(android_sdk_version,
3546                                  kNNAPIRuntimeFeatureLevel7, &val_ctx);
3547       ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
3548       Expect(node->inputs->size == 2,
3549              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
3550              "Expecting 2 inputs", &val_ctx);
3551     } break;
3552     default:
3553       // All other operators are not mapped.
3554       AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
3555                            "Unsupported operation type.", &val_ctx);
3556   }
3557   return val_ctx.is_valid;
3558 }  // NOLINT(readability/fn_size)
3559 
Map(TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const NNAPIOpMappingArgs & mapping_args,ANeuralNetworksOperationType * nn_op_type,NnapiDelegateVendorPlugin * vendor_plugin)3560 TfLiteStatus NNAPIDelegateKernel::Map(
3561     TfLiteContext* context, int builtin_code, int version,
3562     int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
3563     ANeuralNetworksOperationType* nn_op_type,
3564     NnapiDelegateVendorPlugin* vendor_plugin) {
3565   auto add_zero_bias = [mapping_args](int input_id, int filter_id,
3566                                       int num_elements) -> void {
3567     // NNAPI requires a bias tensor, so we allocate a new tensor to fill
3568     // it with zeroes. It is deleted with other tensors in the context
3569     // during subgraph destructor call.
3570     int bias_index = -1;
3571     mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
3572     TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
3573     const auto input_type = mapping_args.context->tensors[input_id].type;
3574     if (input_type == kTfLiteFloat32) {
3575       bias_tensor->type = kTfLiteFloat32;
3576     } else {
3577       bias_tensor->type = kTfLiteInt32;
3578     }
3579     // Create an array with a required bias shape and resize the bias
3580     // tensor.
3581     TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
3582     bias_shape->data[0] = num_elements;
3583     bias_tensor->allocation_type = kTfLiteDynamic;
3584     mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
3585                                        bias_shape);
3586     // Set tensor's values to zeroes and add it using AddVector*, so
3587     // that the values are copied to NNAPI. We don't use the AddTensor
3588     // function because it doesn't copy values and the tensor we just
3589     // created is not in the node->inputs.
3590     if (input_type == kTfLiteFloat32) {
3591       memset(bias_tensor->data.f, 0, num_elements * sizeof(float));
3592       mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
3593                                                     num_elements);
3594     } else {
3595       memset(bias_tensor->data.i32, 0, num_elements * sizeof(int));
3596       const TfLiteTensor& input_tensor =
3597           mapping_args.context->tensors[input_id];
3598       const TfLiteTensor& filter_tensor =
3599           mapping_args.context->tensors[filter_id];
3600       // NNAPI requires bias scale to be a product of an input scale and
3601       // a filter scale.
3602       bias_tensor->params.scale =
3603           input_tensor.params.scale * filter_tensor.params.scale;
3604       mapping_args.builder->AddVectorInt32Operand(
3605           bias_tensor->data.i32, num_elements, bias_tensor->params.scale,
3606           /*zero_point=*/0);
3607     }
3608   };
3609   switch (builtin_code) {
3610     case kTfLiteBuiltinAdd: {
3611       auto builtin =
3612           reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
3613       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3614       *nn_op_type = ANEURALNETWORKS_ADD;
3615     } break;
3616     case kTfLiteBuiltinArgMax: {
3617       *nn_op_type = ANEURALNETWORKS_ARGMAX;
3618     } break;
3619     case kTfLiteBuiltinArgMin: {
3620       *nn_op_type = ANEURALNETWORKS_ARGMIN;
3621     } break;
3622     case kTfLiteBuiltinMul: {
3623       auto builtin =
3624           reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
3625       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3626       *nn_op_type = ANEURALNETWORKS_MUL;
3627     } break;
3628     case kTfLiteBuiltinAveragePool2d: {
3629       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3630       *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
3631     } break;
3632     case kTfLiteBuiltinMaxPool2d: {
3633       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3634       *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
3635     } break;
3636     case kTfLiteBuiltinL2Pool2d: {
3637       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3638       *nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
3639     } break;
3640     case kTfLiteBuiltinConv2d: {
3641       auto builtin =
3642           reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
3643       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3644       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3645       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3646       const int input_id = mapping_args.node->inputs->data[/*kInputTensor*/ 0];
3647       const int filter_id =
3648           mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3649       const auto& input_tensor = context->tensors[input_id];
3650       const auto& filter_tensor = context->tensors[filter_id];
3651       auto is_grouped_conv = false;
3652       // Only check grouped convolution if input and filter shape is propagated.
3653       if (input_tensor.dims->size != 0 && filter_tensor.dims->size != 0) {
3654         is_grouped_conv =
3655             input_tensor.dims->data[3] != filter_tensor.dims->data[3];
3656       }
3657       if (is_grouped_conv) {
3658         mapping_args.builder->AddScalarInt32Operand(
3659             input_tensor.dims->data[3] / filter_tensor.dims->data[3]);
3660       }
3661       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3662       // NNAPI supports dilated Conv2D since NNAPI 1.2.
3663       if (builtin->dilation_width_factor != 1 ||
3664           builtin->dilation_height_factor != 1) {
3665         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
3666         mapping_args.builder->AddScalarInt32Operand(
3667             builtin->dilation_width_factor);
3668         mapping_args.builder->AddScalarInt32Operand(
3669             builtin->dilation_height_factor);
3670       }
3671       if (is_grouped_conv) {
3672         *nn_op_type = ANEURALNETWORKS_GROUPED_CONV_2D;
3673       } else {
3674         *nn_op_type = ANEURALNETWORKS_CONV_2D;
3675       }
3676     } break;
3677     case kTfLiteBuiltinDepthwiseConv2d: {
3678       auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
3679           mapping_args.node->builtin_data);
3680       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3681       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3682       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3683       mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
3684       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3685       if (builtin->dilation_width_factor != 1 ||
3686           builtin->dilation_height_factor != 1) {
3687         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format.
3688         mapping_args.builder->AddScalarInt32Operand(
3689             builtin->dilation_width_factor);
3690         mapping_args.builder->AddScalarInt32Operand(
3691             builtin->dilation_height_factor);
3692       }
3693       *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
3694     } break;
3695     case kTfLiteBuiltinFullyConnected: {
3696       const bool is_bias_present =
3697           mapping_args.node->inputs->size == 3 &&
3698           mapping_args.node->inputs->data[2] != kTfLiteOptionalTensor;
3699       if (!is_bias_present) {
3700         const int input_tensor_id =
3701             mapping_args.node->inputs->data[/*kInputTensor*/ 0];
3702         const int filter_tensor_id =
3703             mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3704         const int num_units =
3705             mapping_args.context->tensors[filter_tensor_id].dims->data[0];
3706         add_zero_bias(input_tensor_id, filter_tensor_id, num_units);
3707       }
3708       auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
3709           mapping_args.node->builtin_data);
3710       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3711       *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
3712     } break;
3713     case kTfLiteBuiltinHardSwish: {
3714       *nn_op_type = ANEURALNETWORKS_HARD_SWISH;
3715     } break;
3716     case kTfLiteBuiltinSoftmax: {
3717       auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
3718           mapping_args.node->builtin_data);
3719       mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3720       // Optional scalar specifying the dimension the activation would be
3721       // performed on is not added. Default to -1.
3722       *nn_op_type = ANEURALNETWORKS_SOFTMAX;
3723     } break;
3724     case kTfLiteBuiltinReshape: {
3725       if (mapping_args.node->inputs->size == 1) {
3726         // if no new_shape tensor, construct the new shape from params.
3727         auto* params = reinterpret_cast<TfLiteReshapeParams*>(
3728             mapping_args.node->builtin_data);
3729         int num_dimensions = params->num_dimensions;
3730         std::vector<int32_t> output_shape(num_dimensions);
3731         for (int i = 0; i < num_dimensions; ++i) {
3732           output_shape[i] = params->shape[i];
3733         }
3734         mapping_args.builder->AddVectorInt32Operand(
3735             output_shape.data(), static_cast<uint32_t>(num_dimensions));
3736       }
3737       *nn_op_type = ANEURALNETWORKS_RESHAPE;
3738     } break;
3739     case kTfLiteBuiltinResizeBilinear: {
3740       const int output_id = mapping_args.node->outputs->data[0];
3741       auto& output = mapping_args.context->tensors[output_id];
3742       const int output_height = output.dims->data[1];
3743       const int output_width = output.dims->data[2];
3744       mapping_args.builder->AddScalarInt32Operand(output_width);
3745       mapping_args.builder->AddScalarInt32Operand(output_height);
3746       auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
3747           mapping_args.node->builtin_data);
3748       if (builtin->align_corners == true ||
3749           builtin->half_pixel_centers == true) {
3750         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
3751         mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3752         mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3753       }
3754       *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
3755     } break;
3756     case kTfLiteBuiltinResizeNearestNeighbor: {
3757       const TfLiteTensor& new_shape =
3758           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3759       // NNAPI uses scalar inputs for height and width.
3760       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
3761       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
3762       mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
3763       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
3764           mapping_args.node->builtin_data);
3765       if (builtin->align_corners == true ||
3766           builtin->half_pixel_centers == true) {
3767         mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3768         mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3769       }
3770       *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
3771     } break;
3772     case kTfLiteBuiltinSqueeze: {
3773       auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
3774           mapping_args.node->builtin_data);
3775       // Note that we add the squeeze dimensions even if the dimensions
3776       // were unspecified (empty), as NNAPI requires the operand.
3777       mapping_args.builder->AddVectorInt32Operand(
3778           builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
3779           static_cast<uint32_t>(builtin->num_squeeze_dims));
3780       *nn_op_type = ANEURALNETWORKS_SQUEEZE;
3781     } break;
3782     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
3783       auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
3784           mapping_args.node->builtin_data);
3785       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3786       mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3787       mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3788       mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3789       const bool hybrid_op = IsHybridOperator(
3790           mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
3791           mapping_args.node);
3792       if (mapping_args.node->inputs->size == 24) {
3793         // Add layer normalization tensors if they are provided.
3794         for (int i = 20; i < 24; ++i) {
3795           const int input_index = mapping_args.node->inputs->data[i];
3796           if (input_index != kTfLiteOptionalTensor) {
3797             mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3798           } else {
3799             mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3800           }
3801         }
3802       } else {
3803         for (int i = 0; i < 4; ++i) {
3804           mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3805         }
3806       }
3807 
3808       *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
3809     } break;
3810     case kTfLiteBuiltinL2Normalization: {
3811       *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
3812     } break;
3813     case kTfLiteBuiltinLocalResponseNormalization: {
3814       auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
3815           mapping_args.node->builtin_data);
3816       mapping_args.builder->AddScalarInt32Operand(builtin->radius);
3817       mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
3818       mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
3819       mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3820       *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
3821     } break;
3822     case kTfLiteBuiltinLshProjection: {
3823       auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
3824           mapping_args.node->builtin_data);
3825       int type = builtin->type;
3826       // In Android Q+, NNAPI uses 3 to denote
3827       // kTfLiteLshProjectionSparse.
3828       const int kNNAPILshProjectionSparse = 3;
3829       if (builtin->type == kTfLiteLshProjectionSparse) {
3830         type = kNNAPILshProjectionSparse;
3831         // Add NNAPI null weight operand.
3832         mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3833       }
3834       mapping_args.builder->AddScalarInt32Operand(type);
3835       *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
3836     } break;
3837     case kTfLiteBuiltinConcatenation: {
3838       auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
3839           mapping_args.node->builtin_data);
3840       int axis = builtin->axis < 0
3841                      ? mapping_args.context
3842                                ->tensors[mapping_args.node->inputs->data[0]]
3843                                .dims->size +
3844                            builtin->axis
3845                      : builtin->axis;
3846       mapping_args.builder->AddScalarInt32Operand(axis);
3847       *nn_op_type = ANEURALNETWORKS_CONCATENATION;
3848     } break;
3849     case kTfLiteBuiltinDequantize: {
3850       *nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
3851     } break;
3852     case kTfLiteBuiltinFloor: {
3853       *nn_op_type = ANEURALNETWORKS_FLOOR;
3854     } break;
3855     case kTfLiteBuiltinRelu: {
3856       *nn_op_type = ANEURALNETWORKS_RELU;
3857     } break;
3858     case kTfLiteBuiltinReluN1To1: {
3859       *nn_op_type = ANEURALNETWORKS_RELU1;
3860     } break;
3861     case kTfLiteBuiltinRelu6: {
3862       *nn_op_type = ANEURALNETWORKS_RELU6;
3863     } break;
3864     case kTfLiteBuiltinLogistic: {
3865       *nn_op_type = ANEURALNETWORKS_LOGISTIC;
3866     } break;
3867     case kTfLiteBuiltinTanh: {
3868       *nn_op_type = ANEURALNETWORKS_TANH;
3869     } break;
3870     case kTfLiteBuiltinSub: {
3871       auto builtin =
3872           reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
3873       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3874       *nn_op_type = ANEURALNETWORKS_SUB;
3875     } break;
3876     case kTfLiteBuiltinDiv: {
3877       auto builtin =
3878           reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
3879       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3880       *nn_op_type = ANEURALNETWORKS_DIV;
3881     } break;
3882     case kTfLiteBuiltinPad:
3883     case kTfLiteBuiltinPadv2: {
3884       // We want to map to PAD as much as possible since it is more widely
3885       // supported. We map to PadV2 only when there is the need to specify
3886       // the padding value
3887       if (mapping_args.node->inputs->size == 2) {
3888         *nn_op_type = ANEURALNETWORKS_PAD;
3889       } else {
3890         const int constant_value_id = mapping_args.node->inputs->data[2];
3891         if (constant_value_id == kTfLiteOptionalTensor) {
3892           *nn_op_type = ANEURALNETWORKS_PAD;
3893         } else {
3894           *nn_op_type = ANEURALNETWORKS_PAD_V2;
3895         }
3896       }
3897     } break;
3898     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
3899       auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
3900           mapping_args.node->builtin_data);
3901       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3902       mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
3903       *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
3904     } break;
3905     case kTfLiteBuiltinSpaceToBatchNd: {
3906       *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
3907     } break;
3908     case kTfLiteBuiltinBatchToSpaceNd: {
3909       *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
3910     } break;
3911     case kTfLiteBuiltinStridedSlice: {
3912       auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
3913           mapping_args.node->builtin_data);
3914       mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
3915       mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
3916       mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
3917       *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
3918     } break;
3919     case kTfLiteBuiltinTranspose: {
3920       *nn_op_type = ANEURALNETWORKS_TRANSPOSE;
3921     } break;
3922     case kTfLiteBuiltinAbs: {
3923       *nn_op_type = ANEURALNETWORKS_ABS;
3924     } break;
3925     case kTfLiteBuiltinExp: {
3926       *nn_op_type = ANEURALNETWORKS_EXP;
3927     } break;
3928     case kTfLiteBuiltinLog: {
3929       *nn_op_type = ANEURALNETWORKS_LOG;
3930     } break;
3931     case kTfLiteBuiltinRsqrt: {
3932       *nn_op_type = ANEURALNETWORKS_RSQRT;
3933     } break;
3934     case kTfLiteBuiltinPow: {
3935       *nn_op_type = ANEURALNETWORKS_POW;
3936     } break;
3937     case kTfLiteBuiltinSlice: {
3938       *nn_op_type = ANEURALNETWORKS_SLICE;
3939     } break;
3940     case kTfLiteBuiltinSin: {
3941       *nn_op_type = ANEURALNETWORKS_SIN;
3942     } break;
3943     case kTfLiteBuiltinTransposeConv: {
3944       int input_tensor_flags = 0;
3945       const int input_tensor_id =
3946           mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
3947       const int weight_tensor_id =
3948           mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3949 
3950       // Transpose convolution doesn't have hybrid variation.
3951       const bool hybrid_op = false;
3952 
3953       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3954         mapping_args.builder->AddTensorInput(
3955             input_tensor_id, hybrid_op,
3956             input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
3957 
3958       } else {
3959         mapping_args.builder->AddTensorInput(
3960             input_tensor_id, hybrid_op,
3961             input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
3962       }
3963       // Transpose convlution uses per-channel quantization with int8 inputs
3964       // even if the number of channels in quantization parameters is equal to 1
3965       // (as opposed to conv2d, which uses per-tensor quantization in this
3966       // case).
3967       mapping_args.builder->AddTensorInput(
3968           weight_tensor_id, hybrid_op,
3969           input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
3970 
3971       const bool is_bias_present =
3972           mapping_args.node->inputs->size == 4 &&
3973           mapping_args.node->inputs->data[/*kBiasTensor*/ 3] !=
3974               kTfLiteOptionalTensor;
3975 
3976       if (is_bias_present) {
3977         mapping_args.builder->AddTensorInput(
3978             mapping_args.node->inputs->data[/*kBiasTensor*/ 3], hybrid_op);
3979       } else {
3980         const TfLiteTensor& output_shape =
3981             mapping_args.context->tensors[mapping_args.node->inputs
3982                                               ->data[/*kOutputShapeTensor*/ 0]];
3983         const int output_depth = output_shape.data.i32[3];
3984         add_zero_bias(input_tensor_id, weight_tensor_id, output_depth);
3985       }
3986       mapping_args.builder->AddTensorInput(
3987           mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
3988 
3989       auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
3990           mapping_args.node->builtin_data);
3991       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3992       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3993       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3994       mapping_args.builder->AddScalarInt32Operand(
3995           /*ANEURALNETWORKS_FUSED_NONE*/ 0);
3996       // Use NHWC layout for input and output.
3997       mapping_args.builder->AddScalarBoolOperand(false);
3998       *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
3999     } break;
4000     case kTfLiteBuiltinSqrt: {
4001       *nn_op_type = ANEURALNETWORKS_SQRT;
4002     } break;
4003     case kTfLiteBuiltinRnn: {
4004       // NNAPI need both state_in and state_out.
4005       int ann_index;
4006       mapping_args.builder->AddStateFloat32Tensor(
4007           mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
4008           &ann_index);
4009       mapping_args.model_state_outputs->push_back(ann_index);
4010       mapping_args.model_state_tfl_inputs->push_back(
4011           mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
4012       auto builtin =
4013           reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
4014       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4015       *nn_op_type = ANEURALNETWORKS_RNN;
4016     } break;
4017     case kTfLiteBuiltinSpaceToDepth: {
4018       auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
4019           mapping_args.node->builtin_data);
4020       mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
4021       *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
4022     } break;
4023     case kTfLiteBuiltinSvdf: {
4024       // NNAPI need both state_in and state_out.
4025       int ann_index;
4026       mapping_args.builder->AddStateFloat32Tensor(
4027           mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
4028           &ann_index);
4029       mapping_args.model_state_outputs->push_back(ann_index);
4030       mapping_args.model_state_tfl_inputs->push_back(
4031           mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
4032 
4033       auto builtin =
4034           reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
4035       mapping_args.builder->AddScalarInt32Operand(builtin->rank);
4036       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4037       *nn_op_type = ANEURALNETWORKS_SVDF;
4038     } break;
4039     case kTfLiteBuiltinLstm: {
4040       if (isLstmBasicKernel(mapping_args.node)) {
4041         const auto output_dims =
4042             mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
4043                 .dims;
4044 
4045         // Inputs kInputData
4046         mapping_args.builder->AddTensorInput(
4047             mapping_args.node->inputs->data[0 /* kInputData */],
4048             /* hybrid_op */ false,
4049             /* scalar_as_tensor */ false);
4050 
4051         // The 8 weights tensors are set decomposing the
4052         // kInputWeights param
4053         const auto weight_tensor =
4054             mapping_args.context->tensors[mapping_args.node->inputs
4055                                               ->data[2 /* kInputWeights */]];
4056 
4057         std::vector<uint8_t> recurrent_to_input;
4058         std::vector<uint8_t> input_to_input;
4059         std::vector<uint8_t> recurrent_to_cell;
4060         std::vector<uint8_t> input_to_cell;
4061         std::vector<uint8_t> recurrent_to_forget;
4062         std::vector<uint8_t> input_to_forget;
4063         std::vector<uint8_t> recurrent_to_output;
4064         std::vector<uint8_t> input_to_output;
4065         tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
4066             weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
4067             &input_to_input, &recurrent_to_cell, &input_to_cell,
4068             &recurrent_to_forget, &input_to_forget, &recurrent_to_output,
4069             &input_to_output);
4070 
4071         TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
4072         TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
4073         tflite::delegate::nnapi::SetWeightSubmatrixDims(
4074             weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
4075 
4076         int new_tensor_index = -1;
4077 
4078         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4079             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4080             input_weight_dims, input_to_input, weight_tensor.params,
4081             &new_tensor_index);
4082 
4083         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4084             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4085             input_weight_dims, input_to_forget, weight_tensor.params,
4086             &new_tensor_index);
4087 
4088         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4089             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4090             input_weight_dims, input_to_cell, weight_tensor.params,
4091             &new_tensor_index);
4092 
4093         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4094             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4095             input_weight_dims, input_to_output, weight_tensor.params,
4096             &new_tensor_index);
4097 
4098         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4099             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4100             recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
4101             &new_tensor_index);
4102 
4103         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4104             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4105             recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
4106             &new_tensor_index);
4107 
4108         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4109             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4110             recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
4111             &new_tensor_index);
4112 
4113         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4114             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4115             recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
4116             &new_tensor_index);
4117 
4118         TfLiteIntArrayFree(input_weight_dims);
4119         TfLiteIntArrayFree(recurrent_weight_dims);
4120 
4121         // Biases have to be split in four.
4122         const auto bias_size = output_dims->data[1];
4123         const TfLiteTensor& biases_tensor =
4124             mapping_args.context->tensors[mapping_args.node->inputs
4125                                               ->data[3 /* kInputBiases */]];
4126 
4127         std::vector<int32_t> input_bias;
4128         std::vector<int32_t> cell_bias;
4129         std::vector<int32_t> forget_bias;
4130         std::vector<int32_t> output_bias;
4131         delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
4132                                              &input_bias, &cell_bias,
4133                                              &forget_bias, &output_bias);
4134 
4135         int input_bias_tensor = -1;
4136         mapping_args.builder->AddNewInputConstantTensor<int32_t>(
4137             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
4138             biases_tensor.params, &input_bias_tensor);
4139         int forget_bias_tensor = -1;
4140         mapping_args.builder->AddNewInputConstantTensor(
4141             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
4142             forget_bias, biases_tensor.params, &forget_bias_tensor);
4143         int cell_gate_bias_tensor = -1;
4144         mapping_args.builder->AddNewInputConstantTensor(
4145             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
4146             biases_tensor.params, &cell_gate_bias_tensor);
4147         int output_gate_bias_tensor = -1;
4148         mapping_args.builder->AddNewInputConstantTensor(
4149             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
4150             output_bias, biases_tensor.params, &output_gate_bias_tensor);
4151 
4152         mapping_args.builder->AddTensorInput(
4153             mapping_args.node->inputs->data[4 /* kInputPrevState */],
4154             /* hybrid_op */ false,
4155             /* scalar_as_tensor */ false);
4156 
4157         // kInputPrevActivation
4158         mapping_args.builder->AddTensorInput(
4159             mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
4160             /* hybrid_op */ false,
4161             /* scalar_as_tensor */ false);
4162 
4163         // Configuring the copy from the activation, state outputs
4164         // to their associated inputs
4165         mapping_args.feedback_loops->push_back(std::make_tuple(
4166             mapping_args.node->outputs->data[0 /*kOutputActivation*/],
4167             mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
4168 
4169         mapping_args.feedback_loops->push_back(std::make_tuple(
4170             mapping_args.node->outputs->data[1 /*kOutputState*/],
4171             mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
4172 
4173         // OUTPUTS
4174         // Setting only the first two since the remaining ones are
4175         // ignored by NNAPI
4176         mapping_args.builder->AddTensorOutput(
4177             mapping_args.node->outputs->data[1 /* kOutputState */], 0);
4178 
4179         mapping_args.builder->AddTensorOutput(
4180             mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
4181 
4182         *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
4183       } else {
4184         auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
4185             mapping_args.node->builtin_data);
4186         mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4187         mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
4188         mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
4189 
4190         // Current NNAPI implementation requires the scratch_buffer as
4191         // output.
4192         mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
4193 
4194         // NNAPI need both state_in and state_out for cell_state and
4195         // output_state.
4196         int ann_index;
4197         mapping_args.builder->AddStateFloat32Tensor(
4198             mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
4199             &ann_index);
4200         mapping_args.model_state_outputs->push_back(ann_index);
4201         mapping_args.model_state_tfl_inputs->push_back(
4202             mapping_args.node->inputs
4203                 ->data[/*kInputActivationStateTensor*/ 18]);
4204         mapping_args.builder->AddStateFloat32Tensor(
4205             mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
4206             &ann_index);
4207         mapping_args.model_state_outputs->push_back(ann_index);
4208         mapping_args.model_state_tfl_inputs->push_back(
4209             mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
4210 
4211         const bool hybrid_op = IsHybridOperator(
4212             mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
4213 
4214         if (mapping_args.node->inputs->size == 24) {
4215           for (int i = 20; i < 24; ++i) {
4216             const auto input_index = mapping_args.node->inputs->data[i];
4217             if (input_index != kTfLiteOptionalTensor) {
4218               mapping_args.builder->AddTensorInput(input_index, hybrid_op);
4219             } else {
4220               mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
4221             }
4222           }
4223         }
4224 
4225         *nn_op_type = ANEURALNETWORKS_LSTM;
4226       }
4227     } break;
4228     case kTfLiteBuiltinMean: {
4229       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4230           mapping_args.node->builtin_data);
4231       int32_t keep_dims = 0;
4232       if (builtin->keep_dims) keep_dims = 1;
4233       mapping_args.builder->AddScalarInt32Operand(keep_dims);
4234       *nn_op_type = ANEURALNETWORKS_MEAN;
4235     } break;
4236     case kTfLiteBuiltinEmbeddingLookup: {
4237       *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
4238     } break;
4239     case kTfLiteBuiltinHashtableLookup: {
4240       *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
4241     } break;
4242     case kTfLiteBuiltinMaximum: {
4243       *nn_op_type = ANEURALNETWORKS_MAXIMUM;
4244     } break;
4245     case kTfLiteBuiltinMinimum: {
4246       *nn_op_type = ANEURALNETWORKS_MINIMUM;
4247     } break;
4248     case kTfLiteBuiltinCast: {
4249       *nn_op_type = ANEURALNETWORKS_CAST;
4250     } break;
4251     case kTfLiteBuiltinLeakyRelu: {
4252       const auto input_type =
4253           mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
4254               .type;
4255       auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
4256           mapping_args.node->builtin_data);
4257 
4258       TfLiteTensor alpha_tensor;
4259       alpha_tensor.type = input_type;
4260       alpha_tensor.allocation_type = kTfLiteDynamic;
4261       alpha_tensor.dims = TfLiteIntArrayCreate(1);
4262       alpha_tensor.dims->data[0] = 1;
4263       alpha_tensor.params.zero_point = 0;
4264 
4265       int new_tensor_index = -1;
4266       if (input_type == kTfLiteFloat32) {
4267         alpha_tensor.params.scale = 0;
4268         std::vector<float> alpha_value = {builtin->alpha};
4269         mapping_args.builder->AddNewInputConstantTensor(
4270             ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
4271             alpha_value, alpha_tensor.params, &new_tensor_index);
4272       } else if (input_type == kTfLiteInt8 &&
4273                  android_sdk_version >= kMinSdkVersionForNNAPI13) {
4274         alpha_tensor.params.scale = builtin->alpha;
4275         std::vector<int8_t> alpha_value = {1};
4276         mapping_args.builder->AddNewInputConstantTensor(
4277             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
4278             alpha_tensor.dims, alpha_value, alpha_tensor.params,
4279             &new_tensor_index);
4280       } else {
4281         alpha_tensor.params.scale = builtin->alpha;
4282         std::vector<uint8_t> alpha_value = {1};
4283         mapping_args.builder->AddNewInputConstantTensor(
4284             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4285             alpha_tensor.dims, alpha_value, alpha_tensor.params,
4286             &new_tensor_index);
4287       }
4288 
4289       *nn_op_type = ANEURALNETWORKS_PRELU;
4290     } break;
4291     case kTfLiteBuiltinPrelu: {
4292       *nn_op_type = ANEURALNETWORKS_PRELU;
4293     } break;
4294     case kTfLiteBuiltinTile: {
4295       *nn_op_type = ANEURALNETWORKS_TILE;
4296     } break;
4297     case kTfLiteBuiltinLogicalOr: {
4298       *nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
4299     } break;
4300     case kTfLiteBuiltinLogicalAnd: {
4301       *nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
4302     } break;
4303     case kTfLiteBuiltinLogicalNot: {
4304       *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
4305     } break;
4306     case kTfLiteBuiltinLess: {
4307       *nn_op_type = ANEURALNETWORKS_LESS;
4308     } break;
4309     case kTfLiteBuiltinLessEqual: {
4310       *nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
4311     } break;
4312     case kTfLiteBuiltinGreater: {
4313       *nn_op_type = ANEURALNETWORKS_GREATER;
4314     } break;
4315     case kTfLiteBuiltinGreaterEqual: {
4316       *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
4317     } break;
4318     case kTfLiteBuiltinEqual: {
4319       *nn_op_type = ANEURALNETWORKS_EQUAL;
4320     } break;
4321     case kTfLiteBuiltinNotEqual: {
4322       *nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
4323     } break;
4324     case kTfLiteBuiltinNeg: {
4325       *nn_op_type = ANEURALNETWORKS_NEG;
4326     } break;
4327     case kTfLiteBuiltinTopkV2: {
4328       const TfLiteTensor& k_param =
4329           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
4330       mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
4331       *nn_op_type = ANEURALNETWORKS_TOPK_V2;
4332     } break;
4333     case kTfLiteBuiltinSelect: {
4334       *nn_op_type = ANEURALNETWORKS_SELECT;
4335     } break;
4336     case kTfLiteBuiltinGather: {
4337       auto builtin = reinterpret_cast<TfLiteGatherParams*>(
4338           mapping_args.node->builtin_data);
4339       mapping_args.builder->AddScalarInt32Operand(builtin->axis);
4340       mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
4341                                            /* hybrid_op */ false,
4342                                            /* tensor_flags */ 0);
4343       *nn_op_type = ANEURALNETWORKS_GATHER;
4344     } break;
4345     case kTfLiteBuiltinBidirectionalSequenceLstm: {
4346       auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
4347           mapping_args.node->builtin_data);
4348       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4349       mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
4350       mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
4351       mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
4352       mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
4353       // TF Lite doesn't support layer normalization in bidirectional
4354       // sequence LSTM, so we insert optional tensors for NNAPI.
4355       for (int i = 0; i < 8; ++i) {
4356         mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
4357       }
4358       *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
4359     } break;
4360     case kTfLiteBuiltinExpandDims: {
4361       const TfLiteTensor& axis_param =
4362           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
4363       mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
4364       *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
4365     } break;
4366     case kTfLiteBuiltinSplit: {
4367       const TfLiteTensor& axis =
4368           mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
4369       auto builtin =
4370           reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
4371       mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
4372       mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
4373       *nn_op_type = ANEURALNETWORKS_SPLIT;
4374     } break;
4375     case kTfLiteBuiltinLogSoftmax: {
4376       // Scaling and axis are hardcoded to respectively 1 and -1
4377       // in TFLite.
4378       mapping_args.builder->AddScalarFloat32Operand(1);
4379       mapping_args.builder->AddScalarInt32Operand(-1);
4380       *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
4381     } break;
4382     case kTfLiteBuiltinQuantize: {
4383       auto input_index = mapping_args.node->inputs->data[0];
4384       // NNAPI doesn't support requantization cases but only quantizations
4385       // from float. Dequantizing our input adding a Dequantize node before
4386       // this one.
4387       if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
4388         mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
4389                                             mapping_args.node_index);
4390       }
4391 
4392       *nn_op_type = ANEURALNETWORKS_QUANTIZE;
4393     } break;
4394     case kTfLiteBuiltinReduceAny: {
4395       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4396           mapping_args.node->builtin_data);
4397       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4398       *nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
4399     } break;
4400     case kTfLiteBuiltinReduceMin: {
4401       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4402           mapping_args.node->builtin_data);
4403       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4404       *nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
4405     } break;
4406     case kTfLiteBuiltinReduceMax: {
4407       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4408           mapping_args.node->builtin_data);
4409       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4410       *nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
4411     } break;
4412     case kTfLiteBuiltinDepthToSpace: {
4413       auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
4414           mapping_args.node->builtin_data);
4415       mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
4416       *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
4417     } break;
4418     case kTfLiteBuiltinReduceProd: {
4419       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4420           mapping_args.node->builtin_data);
4421       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4422       *nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
4423     } break;
4424     case kTfLiteBuiltinSum: {
4425       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4426           mapping_args.node->builtin_data);
4427       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4428       *nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
4429     } break;
4430     case kTfLiteBuiltinElu: {
4431       mapping_args.builder->AddScalarFloat32Operand(1.0);
4432       *nn_op_type = ANEURALNETWORKS_ELU;
4433     } break;
4434     case kTfLiteBuiltinFill: {
4435       *nn_op_type = ANEURALNETWORKS_FILL;
4436     } break;
4437     case kTfLiteBuiltinBatchMatmul: {
4438       auto builtin = reinterpret_cast<TfLiteBatchMatMulParams*>(
4439           mapping_args.node->builtin_data);
4440       mapping_args.builder->AddScalarBoolOperand(builtin->adj_x);
4441       mapping_args.builder->AddScalarBoolOperand(builtin->adj_y);
4442       *nn_op_type = ANEURALNETWORKS_BATCH_MATMUL;
4443     } break;
4444     case kTfLiteBuiltinPack: {
4445       *nn_op_type = ANEURALNETWORKS_PACK;
4446     } break;
4447     case kTfLiteBuiltinMirrorPad: {
4448       auto builtin = reinterpret_cast<TfLiteMirrorPaddingParams*>(
4449           mapping_args.node->builtin_data);
4450       mapping_args.builder->AddScalarInt32Operand(builtin->mode);
4451       *nn_op_type = ANEURALNETWORKS_MIRROR_PAD;
4452     } break;
4453     case kTfLiteBuiltinReverseV2: {
4454       *nn_op_type = ANEURALNETWORKS_REVERSE;
4455     } break;
4456     default:
4457       // All other operators are not mapped.
4458       return kTfLiteError;
4459   }
4460   return kTfLiteOk;
4461 }
4462 
4463 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params,int * nnapi_errno)4464 TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
4465                                        const TfLiteDelegateParams* params,
4466                                        int* nnapi_errno) {
4467   for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
4468     nodes_.push_back(node_index);
4469   }
4470 
4471   // Initialize densify map and dequantize map.
4472   densify_output_to_node_mapping_ = std::vector<int>(context->tensors_size, -1);
4473   non_const_dequantize_output_to_node_mapping_ =
4474       std::vector<int>(context->tensors_size, -1);
4475   const auto delegate_options =
4476       StatefulNnApiDelegate::GetOptions(params->delegate);
4477   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
4478       ShouldUseTargetDevices(delegate_options, nnapi_)) {
4479     TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
4480                                            nnapi_errno, &nnapi_devices_));
4481 
4482     if (nnapi_devices_.empty()) {
4483       TF_LITE_KERNEL_LOG(
4484           context, "NNAPI delegate requested but no accelerators available.");
4485       return kTfLiteError;
4486     }
4487 
4488     if (nnapi_->SL_ANeuralNetworksDiagnostic_registerCallbacks != nullptr) {
4489       nnapi_->SL_ANeuralNetworksDiagnostic_registerCallbacks(
4490           [](const void* nnapi,
4491              const ANeuralNetworksDiagnosticCompilationInfo* info) {
4492             return LogCompilationInfoOnce(static_cast<const NnApi*>(nnapi),
4493                                           info);
4494           },
4495           [](const void* nnapi,
4496              const ANeuralNetworksDiagnosticExecutionInfo* info) {
4497             return LogExecutionInfoOnce(static_cast<const NnApi*>(nnapi), info);
4498           },
4499           const_cast<NnApi*>(nnapi_));
4500       TFLITE_LOG_PROD(TFLITE_LOG_INFO,
4501                       "Registered diagnostics callbacks in NNAPI SL driver"
4502                       "SL_ANeuralNetworksDiagnostic_registerCallbacks.");
4503     } else {
4504       TFLITE_LOG_PROD(TFLITE_LOG_WARNING,
4505                       "NNAPI SL driver did not implement "
4506                       "SL_ANeuralNetworksDiagnostic_registerCallbacks!");
4507     }
4508   }
4509 
4510   if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12 &&
4511       delegate_options.allow_dynamic_dimensions &&
4512       delegate_options.vendor_plugin != nullptr) {
4513     TF_LITE_KERNEL_LOG(context,
4514                        "Models with dynamic dimensions and vendor plugin is "
4515                        "not supported before NNAPI 1.2 (API level 29).");
4516     return kTfLiteError;
4517   }
4518 
4519   // Mark the handle backed tensors.
4520   tensor_memory_map_ =
4521       &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
4522 
4523   // Initialize tensor max size hints.
4524   tensor_max_size_hints_.resize(context->tensors_size, 0);
4525   for (const auto it : delegate_options.tensor_max_size_hints) {
4526     auto tensor_index = it.first;
4527     // Skip invalid or optional entries.
4528     if (tensor_index >= context->tensors_size || tensor_index < 0) continue;
4529     // Skip tensors with static shape.
4530     if (!HasUnspecifiedDimension(&context->tensors[tensor_index])) continue;
4531     auto max_size_hint = it.second;
4532     tensor_max_size_hints_[tensor_index] = max_size_hint;
4533   }
4534 
4535   if (!nn_model_) {
4536     ANeuralNetworksModel* model = nullptr;
4537     RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4538                                     nnapi_->ANeuralNetworksModel_create(&model),
4539                                     "creating NNAPI model", nnapi_errno);
4540     nn_model_.reset(model);
4541 
4542     TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
4543                                      params->input_tensors,
4544                                      params->output_tensors, nnapi_errno));
4545   }
4546 
4547   auto* cache = StatefulNnApiDelegate::GetCache(params->delegate);
4548   if (cache) {
4549     // Compilation caching is enabled, construct the uint8 token.
4550     uint64_t token_parts[4];
4551     // model_token is incorporated into parition_key by TFLite Serialization.
4552     // NNAPI uses 256-bit key, but we can just tile the unique 64-bit
4553     // fingerprint from TFLite.
4554     auto partition_entry = cache->GetEntryForKernel(kNnapiId, context, params);
4555     token_parts[0] = partition_entry.GetFingerprint();
4556     token_parts[1] = partition_entry.GetFingerprint();
4557     token_parts[2] = partition_entry.GetFingerprint();
4558     token_parts[3] = partition_entry.GetFingerprint();
4559     // TODO(b/172238515): get token size from header instead of hardcoding.
4560     // Allocate one extra 'null' byte to avoid bugs with backends that might
4561     // be doing strlen() on the token ptr.
4562     std::vector<uint8_t> nnapi_cache_token(33, 0);
4563     // Copy the token bits.
4564     uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
4565     for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
4566       nnapi_cache_token[i] = p[i];
4567     }
4568 
4569     nn_compilation_cache_token_ = nnapi_cache_token;
4570   }
4571 
4572   nn_execution_cache_.SetMaxCacheSize(
4573       delegate_options.max_execution_cache_size);
4574 
4575   initialised_ = true;
4576 
4577   return kTfLiteOk;
4578 }
4579 
Prepare(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)4580 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
4581                                           TfLiteNode* node, int* nnapi_errno) {
4582   if (!initialised_) {
4583     return kTfLiteError;
4584   }
4585 
4586   const auto delegate_options =
4587       StatefulNnApiDelegate::GetOptions(node->delegate);
4588   if (nn_compilation_) {
4589     return kTfLiteOk;
4590   }
4591 
4592   ANeuralNetworksCompilation* compilation = nullptr;
4593   if (!nnapi_devices_.empty()) {
4594     // Compile for the selected accelerator.
4595     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4596         context,
4597         nnapi_->ANeuralNetworksCompilation_createForDevices(
4598             nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
4599             &compilation),
4600         "creating NNAPI model for given devices", nnapi_errno);
4601   } else {
4602     // Trying to call ANeuralNetworksCompilation_create when the delegate is
4603     // constructed from a support library would result in a crash.
4604     if (nnapi_->ANeuralNetworksCompilation_create != nullptr) {
4605       RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4606                                       nnapi_->ANeuralNetworksCompilation_create(
4607                                           nn_model_.get(), &compilation),
4608                                       "creating NNAPI compilation",
4609                                       nnapi_errno);
4610     } else {
4611       TF_LITE_KERNEL_LOG(
4612           context,
4613           "Attempted to call ANeuralNetworksCompilation_create from NNAPI "
4614           "delegate that is constructed from a support library");
4615       return kTfLiteError;
4616     }
4617   }
4618 
4619   auto preference = delegate_options.execution_preference;
4620   if (preference !=
4621       StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
4622     const int preference_result =
4623         nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
4624                                                          preference);
4625     if (preference_result != ANEURALNETWORKS_NO_ERROR) {
4626       nnapi_->ANeuralNetworksCompilation_free(compilation);
4627       compilation = nullptr;
4628     }
4629     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
4630                                     "setting compilation preferences",
4631                                     nnapi_errno);
4632   }
4633 
4634   if (!nn_compilation_cache_token_.empty()) {
4635     const char* cache_dir = delegate_options.cache_dir;
4636     const int set_caching_result =
4637         nnapi_->ANeuralNetworksCompilation_setCaching(
4638             compilation, cache_dir, nn_compilation_cache_token_.data());
4639     if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
4640       nnapi_->ANeuralNetworksCompilation_free(compilation);
4641       compilation = nullptr;
4642     }
4643     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
4644                                     "configuring NNAPI caching", nnapi_errno);
4645   }
4646   // Set compilation timeout if applicable.
4647   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4648     if (delegate_options.max_compilation_timeout_duration_ns > 0) {
4649       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4650           context,
4651           nnapi_->ANeuralNetworksCompilation_setTimeout(
4652               compilation,
4653               delegate_options.max_compilation_timeout_duration_ns),
4654           "setting compilation timeout", nnapi_errno);
4655     }
4656     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4657         context,
4658         nnapi_->ANeuralNetworksCompilation_setPriority(
4659             compilation, delegate_options.execution_priority),
4660         "setting compilation priority", nnapi_errno);
4661   }
4662   if (delegate_options.vendor_compilation_hints && vendor_plugin_) {
4663     TF_LITE_ENSURE_STATUS(vendor_plugin_->ConfigureCompilationHints(
4664         delegate_options.vendor_compilation_hints, compilation));
4665   }
4666   const int finish_result =
4667       nnapi_->ANeuralNetworksCompilation_finish(compilation);
4668   if (finish_result != ANEURALNETWORKS_NO_ERROR) {
4669     nnapi_->ANeuralNetworksCompilation_free(compilation);
4670     compilation = nullptr;
4671   }
4672   RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
4673                                   "completing NNAPI compilation", nnapi_errno);
4674   nn_compilation_.reset(compilation);
4675 
4676   bool should_use_burst_mode = delegate_options.use_burst_computation;
4677   // Override should_use_burst_mode to true if the selected NNAPI devices are of
4678   // NNAPI feature level 5 to 7. Starting from NNAPI feature level 8, reusable
4679   // execution is preferred.
4680   if (!nnapi_devices_.empty() &&
4681       target_feature_level_ >= kNNAPIRuntimeFeatureLevel5 &&
4682       target_feature_level_ <= kNNAPIRuntimeFeatureLevel7) {
4683     should_use_burst_mode = true;
4684   }
4685   // Create burst object to be reused across a sequence of executions
4686   if (should_use_burst_mode &&
4687       nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
4688       nnapi_->ANeuralNetworksBurst_create) {
4689     ANeuralNetworksBurst* burst = nullptr;
4690     const int create_burst_result =
4691         nnapi_->ANeuralNetworksBurst_create(nn_compilation_.get(), &burst);
4692     if (create_burst_result != ANEURALNETWORKS_NO_ERROR) {
4693       nnapi_->ANeuralNetworksBurst_free(burst);
4694       burst = nullptr;
4695     }
4696     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, create_burst_result,
4697                                     "creating NNAPI burst", nnapi_errno);
4698     nn_burst_.reset(burst);
4699   }
4700 
4701   return kTfLiteOk;
4702 }
4703 
GetOperationsSupportedByTargetNnApiDevices(TfLiteContext * context,std::vector<int> * supported_nodes,int * nnapi_errno)4704 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
4705     TfLiteContext* context, std::vector<int>* supported_nodes,
4706     int* nnapi_errno) {
4707   if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
4708     return kTfLiteError;
4709   }
4710 
4711   // Get the number of NNAPI operations mapped.
4712   NnapiMappingContext* mapping_context =
4713       reinterpret_cast<NnapiMappingContext*>(mapping_util_->context);
4714   const int nnapi_model_size =
4715       mapping_context->nnapi_to_tflite_op_mapping_.size();
4716 
4717   // Determine the list of operations the device actually supports
4718   std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
4719 
4720   RETURN_TFLITE_ERROR_IF_NN_ERROR(
4721       context,
4722       nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
4723           nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
4724           nnapi_ops_support_flags.get()),
4725       "Checking supported operations for devices", nnapi_errno);
4726 
4727   // A TfLite op is supported only if all the associated NNAPI ones are.
4728   auto tflite_ops_support_status = std::map<int, bool>();
4729   std::for_each(nodes_.begin(), nodes_.end(),
4730                 [&tflite_ops_support_status](int tflite_node_index) {
4731                   tflite_ops_support_status[tflite_node_index] = true;
4732                 });
4733   for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
4734        nnapi_op_index++) {
4735     const auto tflite_op_index =
4736         mapping_context->nnapi_to_tflite_op_mapping_[nnapi_op_index];
4737     tflite_ops_support_status[tflite_op_index] &=
4738         nnapi_ops_support_flags[nnapi_op_index];
4739     if (!tflite_ops_support_status[tflite_op_index]) {
4740       if (std::count(non_const_dequantize_output_to_node_mapping_.begin(),
4741                      non_const_dequantize_output_to_node_mapping_.end(), -1) <
4742               non_const_dequantize_output_to_node_mapping_.size() ||
4743           std::count(densify_output_to_node_mapping_.begin(),
4744                      densify_output_to_node_mapping_.end(),
4745                      -1) < densify_output_to_node_mapping_.size()) {
4746         // Only allow full model delegation for sparse model.
4747         return kTfLiteOk;
4748       }
4749     }
4750   }
4751 
4752   supported_nodes->clear();
4753   std::for_each(nodes_.begin(), nodes_.end(),
4754                 [&supported_nodes, &tflite_ops_support_status](int node_index) {
4755                   if (tflite_ops_support_status[node_index]) {
4756                     supported_nodes->push_back(node_index);
4757                   }
4758                 });
4759 
4760   return kTfLiteOk;
4761 }
4762 
Invoke(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)4763 TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
4764                                          TfLiteNode* node, int* nnapi_errno) {
4765   const bool allow_padding =
4766       nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 &&
4767       nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding != nullptr;
4768   const auto delegate_options =
4769       StatefulNnApiDelegate::GetOptions(node->delegate);
4770 
4771   // Executions are not reusable before Android API 31.
4772   bool execution_is_reusable =
4773       nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 &&
4774       delegate_options.max_execution_cache_size > 0;
4775 
4776   // The output dynamic dimensions cannot be inferred when using custom ops.
4777   bool can_infer_output_shape = !delegate_options.allow_dynamic_dimensions ||
4778                                 delegate_options.vendor_plugin == nullptr;
4779 
4780   ANeuralNetworksExecution* execution = nullptr;
4781   NNAPIExecutionCache::Signature signature;
4782   if (execution_is_reusable) {
4783     signature = CreateExecutionCacheSignature(context, node, delegate_options,
4784                                               *tensor_memory_map_);
4785     execution = nn_execution_cache_.Get(signature);
4786   }
4787   bool should_create_new_execution = execution == nullptr;
4788 
4789   // Manages the lifetime of the new execution.
4790   UniqueExecution unique_execution(nullptr, NNFreeExecution(nnapi_));
4791   if (should_create_new_execution) {
4792     RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4793                                     nnapi_->ANeuralNetworksExecution_create(
4794                                         nn_compilation_.get(), &execution),
4795                                     "creating NNAPI execution", nnapi_errno);
4796     unique_execution.reset(execution);
4797 
4798     if (nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13) {
4799       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4800           context,
4801           nnapi_->ANeuralNetworksExecution_setReusable(execution,
4802                                                        /*reusable=*/true),
4803           "making execution reusable", nnapi_errno);
4804     }
4805     if (delegate_options.vendor_execution_hints && vendor_plugin_) {
4806       TF_LITE_ENSURE_STATUS(vendor_plugin_->ConfigureExecutionHints(
4807           delegate_options.vendor_execution_hints, execution));
4808     }
4809 
4810     // Allow padding bytes for execution inputs & outputs if applicable.
4811     if (allow_padding) {
4812       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4813           context,
4814           nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding(
4815               execution, /*enable=*/true),
4816           "setting allow padding for execution intputs and outputs",
4817           nnapi_errno);
4818     }
4819     // Set compilation timeout if applicable.
4820     if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4821       if (delegate_options.max_execution_timeout_duration_ns > 0) {
4822         RETURN_TFLITE_ERROR_IF_NN_ERROR(
4823             context,
4824             nnapi_->ANeuralNetworksExecution_setTimeout(
4825                 execution, delegate_options.max_execution_timeout_duration_ns),
4826             "setting execution timeout", nnapi_errno);
4827       }
4828       if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
4829         RETURN_TFLITE_ERROR_IF_NN_ERROR(
4830             context,
4831             nnapi_->ANeuralNetworksExecution_setLoopTimeout(
4832                 execution,
4833                 delegate_options.max_execution_loop_timeout_duration_ns),
4834             "setting execution loop timeout", nnapi_errno);
4835       }
4836     }
4837     // Check if the size of input and output memory pool needs to be resized.
4838     if (delegate_options.allow_dynamic_dimensions) {
4839       size_t total_input_byte_size = 0;
4840       // Make the TensorFlow Lite inputs and outputs to ann_indices.
4841       for (int i : TfLiteIntArrayView(node->inputs)) {
4842         // Constant tensors are not NNAPI inputs.
4843         if (i != kTfLiteOptionalTensor &&
4844             context->tensors[i].allocation_type != kTfLiteMmapRo &&
4845             // The delegate might not have mapped this input (this can
4846             // happen if one tensor is split in several ones)
4847             mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i) != -1) {
4848           if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4849             continue;
4850           }
4851           const TfLiteType nn_type_conversion =
4852               mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
4853                                                            i);
4854           int tensor_size = 0;
4855           if (nn_type_conversion == kTfLiteNoType) {
4856             tensor_size = context->tensors[i].bytes;
4857           } else {
4858             size_t type_size;
4859             TF_LITE_ENSURE_OK(
4860                 context,
4861                 GetSizeOfType(context, nn_type_conversion, &type_size));
4862             tensor_size = NumElements(&context->tensors[i]) * type_size;
4863           }
4864           total_input_byte_size += tensor_size;
4865           total_input_byte_size += GetNumPaddingBytes(tensor_size);
4866         }
4867       }
4868       if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
4869         nn_input_memory_ = std::make_unique<NNMemory>(nnapi_, "input_pool",
4870                                                       total_input_byte_size);
4871         // Reset all cached executions when the memory pool is recreated.
4872         nn_execution_cache_.Clear();
4873       }
4874 
4875       size_t total_output_byte_size = 0;
4876       for (int i : TfLiteIntArrayView(node->outputs)) {
4877         const auto& tensor = context->tensors[i];
4878         if (tensor.buffer_handle != kTfLiteNullBufferHandle) {
4879           continue;
4880         }
4881         size_t tensor_size = tensor.bytes;
4882         if (!can_infer_output_shape && HasUnspecifiedDimension(&tensor)) {
4883           if (tensor_max_size_hints_[i] == 0) {
4884             TF_LITE_KERNEL_LOG(context,
4885                                "Missing max tensor size for tensor#%d. When a "
4886                                "vendor plugin is supplied, max tensor size is "
4887                                "required for all dynamic output tensors.",
4888                                i);
4889             return kTfLiteError;
4890           }
4891           tensor_size = std::max(tensor_size, tensor_max_size_hints_[i]);
4892         }
4893         total_output_byte_size += tensor_size;
4894         total_output_byte_size += GetNumPaddingBytes(tensor_size);
4895       }
4896       if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
4897         nn_output_memory_ = std::make_unique<NNMemory>(nnapi_, "output_pool",
4898                                                        total_output_byte_size);
4899         // Reset all cached executions when the memory pool is recreated.
4900         nn_execution_cache_.Clear();
4901       }
4902     }
4903 
4904     if (execution_is_reusable) {
4905       // The execution ownership is transfered to nn_execution_cache_.
4906       nn_execution_cache_.Put(signature, std::move(unique_execution));
4907       unique_execution = nullptr;
4908     }
4909   }
4910   // Set the input tensor buffers. Note: we access tflite tensors using
4911   // absolute indices but NN api indices inputs by relative indices.
4912   int relative_input_index = 0;
4913 
4914   const bool use_int8_asymm_signed =
4915       target_feature_level_ >= kMinSdkVersionForNNAPI13;
4916 
4917   size_t input_offset = 0;
4918   for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
4919     if (absolute_input_index == kTfLiteOptionalTensor) {
4920       continue;
4921     }
4922     ANeuralNetworksOperandType input_nn_operand_type;
4923     ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
4924     TfLiteTensor* tensor = &context->tensors[absolute_input_index];
4925     TfLiteType ann_type_equivalent =
4926         mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
4927                                                      absolute_input_index);
4928     if (delegate_options.allow_dynamic_dimensions &&
4929         ::tflite::HasUnspecifiedDimension(tensor)) {
4930       input_nn_operand_type = ConvertTensorTypeToNNType(
4931           tensor, ann_type_equivalent, use_int8_asymm_signed);
4932       input_nn_operand_type_ptr = &input_nn_operand_type;
4933     }
4934     if (tensor->allocation_type != kTfLiteMmapRo) {
4935       if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4936           tensor->buffer_handle < tensor_memory_map_->size()) {
4937         if (should_create_new_execution) {
4938           RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4939               context,
4940               nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4941                   execution, relative_input_index, input_nn_operand_type_ptr,
4942                   tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4943                   tensor->bytes),
4944               "associating NNAPI execution input with a memory object", tensor,
4945               nnapi_errno);
4946         }
4947         relative_input_index++;
4948         continue;
4949       }
4950       int tensor_size = 0;
4951       int padding_bytes = 0;
4952       if (ann_type_equivalent != kTfLiteNoType) {
4953         const auto num_elements = NumElements(tensor);
4954         uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
4955         if (tensor->type == kTfLiteUInt8 &&
4956             ann_type_equivalent == kTfLiteInt32) {
4957           for (int i = 0; i < num_elements; ++i) {
4958             reinterpret_cast<int32_t*>(input_ptr)[i] =
4959                 static_cast<const int32_t>(tensor->data.uint8[i]);
4960           }
4961         } else if (tensor->type == kTfLiteInt8 &&
4962                    ann_type_equivalent == kTfLiteUInt8) {
4963           // Explicitly convert int8 values to uint8 values.
4964           for (int i = 0; i < num_elements; ++i) {
4965             input_ptr[i] = static_cast<const uint8_t>(
4966                 static_cast<int32_t>(tensor->data.int8[i]) + 128);
4967           }
4968         } else if (tensor->type == kTfLiteInt8 &&
4969                    ann_type_equivalent == kTfLiteInt32) {
4970           if (use_int8_asymm_signed) {
4971             for (int i = 0; i < num_elements; ++i) {
4972               reinterpret_cast<int32_t*>(input_ptr)[i] =
4973                   static_cast<const int32_t>(tensor->data.int8[i]);
4974             }
4975           } else {
4976             for (int i = 0; i < num_elements; ++i) {
4977               reinterpret_cast<int32_t*>(input_ptr)[i] =
4978                   static_cast<const int32_t>(tensor->data.int8[i]) + 128;
4979             }
4980           }
4981         } else if (tensor->type == kTfLiteInt64 &&
4982                    ann_type_equivalent == kTfLiteInt32) {
4983           // Check that values fit into int32.
4984           int32_t* input_ptr_i32 = reinterpret_cast<int32_t*>(input_ptr);
4985           for (int i = 0; i < num_elements; ++i) {
4986             if (input_ptr_i32[i] < std::numeric_limits<int32_t>::min() ||
4987                 input_ptr_i32[i] > std::numeric_limits<int32_t>::max()) {
4988               TF_LITE_KERNEL_LOG(context,
4989                                  "NN API Delegate: int64 value out of bounds "
4990                                  "for int32 target NNAPI tensor\n");
4991               return kTfLiteError;
4992             }
4993             input_ptr_i32[i] = static_cast<int32_t>(tensor->data.i64[i]);
4994           }
4995         } else {
4996           TF_LITE_KERNEL_LOG(
4997               context,
4998               "NN API Delegate: unsupported tensor types conversion: "
4999               "from type code %d to type code %d.\n",
5000               tensor->type, ann_type_equivalent);
5001           return kTfLiteError;
5002         }
5003         size_t type_size;
5004         TF_LITE_ENSURE_OK(
5005             context, GetSizeOfType(context, ann_type_equivalent, &type_size));
5006         tensor_size = NumElements(tensor) * type_size;
5007         padding_bytes = GetNumPaddingBytes(tensor_size);
5008         if (should_create_new_execution) {
5009           RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5010               context,
5011               nnapi_->ANeuralNetworksExecution_setInputFromMemory(
5012                   execution, relative_input_index, input_nn_operand_type_ptr,
5013                   nn_input_memory_->get_handle(), input_offset,
5014                   GetNNTensorSize(tensor_size, allow_padding)),
5015               "associating NNAPI execution input with a memory object", tensor,
5016               nnapi_errno);
5017         }
5018       } else if (mapping_util_->TfLiteIndexToNnIndex(
5019                      mapping_util_.get(), absolute_input_index) != -1) {
5020         // copy data to pre-allocated shared memory.
5021         memcpy(nn_input_memory_->get_data_ptr() + input_offset,
5022                tensor->data.raw, tensor->bytes);
5023         tensor_size = tensor->bytes;
5024         padding_bytes = GetNumPaddingBytes(tensor_size);
5025         if (should_create_new_execution) {
5026           RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5027               context,
5028               nnapi_->ANeuralNetworksExecution_setInputFromMemory(
5029                   execution, relative_input_index, input_nn_operand_type_ptr,
5030                   nn_input_memory_->get_handle(), input_offset,
5031                   GetNNTensorSize(tensor_size, allow_padding)),
5032               "associating NNAPI execution input with a memory object", tensor,
5033               nnapi_errno);
5034         }
5035       }
5036       input_offset += tensor_size + padding_bytes;
5037       relative_input_index++;
5038     }
5039   }
5040 
5041   // Set the output tensor buffers.
5042   int relative_output_index = 0;
5043   size_t output_offset = 0;
5044   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
5045     // If the NNAPI implementation doesn't have some of the outputs
5046     // they are left unmapped and we should not try to read their value here
5047     if (mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(),
5048                                             output_index) == -1) {
5049       continue;
5050     }
5051     ANeuralNetworksOperandType output_nn_operand_type;
5052     ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
5053     TfLiteTensor* tensor = &context->tensors[output_index];
5054     if (delegate_options.allow_dynamic_dimensions && can_infer_output_shape &&
5055         ::tflite::HasUnspecifiedDimension(tensor)) {
5056       TfLiteType ann_type_equivalent =
5057           mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
5058                                                        output_index);
5059       output_nn_operand_type = ConvertTensorTypeToNNType(
5060           tensor, ann_type_equivalent, use_int8_asymm_signed);
5061       output_nn_operand_type_ptr = &output_nn_operand_type;
5062     }
5063     if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
5064         tensor->buffer_handle < tensor_memory_map_->size() &&
5065         should_create_new_execution) {
5066       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5067           context,
5068           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
5069               execution, relative_output_index, output_nn_operand_type_ptr,
5070               tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
5071               tensor->bytes),
5072           "associating NNAPI execution output to a memory object", tensor,
5073           nnapi_errno);
5074 
5075     } else {
5076       size_t tensor_size = tensor->bytes;
5077       if (!can_infer_output_shape && HasUnspecifiedDimension(tensor)) {
5078         tensor_size =
5079             std::max(tensor->bytes, tensor_max_size_hints_[output_index]);
5080       }
5081       int padding_bytes = GetNumPaddingBytes(tensor_size);
5082       if (should_create_new_execution) {
5083         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5084             context,
5085             nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
5086                 execution, relative_output_index, output_nn_operand_type_ptr,
5087                 nn_output_memory_->get_handle(), output_offset,
5088                 GetNNTensorSize(tensor_size, allow_padding)),
5089             "associating NNAPI execution output to a memory object", tensor,
5090             nnapi_errno);
5091       }
5092       output_offset += tensor_size + padding_bytes;
5093     }
5094     relative_output_index++;
5095   }
5096 
5097   // Set memory for NNAPI state_outputs.
5098   for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
5099     int state_tensor_idx = model_state_tfl_inputs_[i];
5100     TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
5101     int padding_bytes = GetNumPaddingBytes(tensor->bytes);
5102     if (should_create_new_execution) {
5103       RETURN_TFLITE_ERROR_IF_NN_ERROR(
5104           context,
5105           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
5106               execution, relative_output_index, nullptr,
5107               nn_output_memory_->get_handle(), output_offset,
5108               GetNNTensorSize(tensor->bytes, allow_padding)),
5109           "associating NNAPI execution state output to a memory object",
5110           nnapi_errno);
5111     }
5112     output_offset += tensor->bytes + padding_bytes;
5113     relative_output_index++;
5114   }
5115 
5116   // Invoke ANN in blocking fashion.
5117   if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
5118     ANeuralNetworksEvent* event = nullptr;
5119     RETURN_TFLITE_ERROR_IF_NN_ERROR(
5120         context,
5121         nnapi_->ANeuralNetworksExecution_startCompute(execution, &event),
5122         "starting async computation", nnapi_errno);
5123     const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
5124     nnapi_->ANeuralNetworksEvent_free(event);
5125     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
5126                                     "waiting for async computation completion",
5127                                     nnapi_errno);
5128   } else {
5129     // Use Burst mode by default for NNAPI 1.2+.
5130     if (nn_burst_) {
5131       RETURN_TFLITE_ERROR_IF_NN_ERROR(
5132           context,
5133           nnapi_->ANeuralNetworksExecution_burstCompute(execution,
5134                                                         nn_burst_.get()),
5135           "running burst computation", nnapi_errno);
5136     } else {
5137       // Use synchronous execution for NNAPI 1.2+ as a fallback.
5138       RETURN_TFLITE_ERROR_IF_NN_ERROR(
5139           context, nnapi_->ANeuralNetworksExecution_compute(execution),
5140           "running computation", nnapi_errno);
5141     }
5142   }
5143 
5144   // resize dynamic output tensors
5145   if (!can_infer_output_shape) {
5146     relative_output_index = 0;
5147     for (auto output_index : TfLiteIntArrayView(node->outputs)) {
5148       TfLiteTensor* tensor = &context->tensors[output_index];
5149       if (HasUnspecifiedDimension(tensor)) {
5150         auto* new_dims = TfLiteIntArrayCreate(tensor->dims->size);
5151         RETURN_TFLITE_ERROR_IF_NN_ERROR(
5152             context,
5153             nnapi_->ANeuralNetworksExecution_getOutputOperandDimensions(
5154                 execution, relative_output_index,
5155                 reinterpret_cast<uint32_t*>(new_dims->data)),
5156             "get output operand dimensions", nnapi_errno);
5157         TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, tensor, new_dims));
5158       }
5159       relative_output_index++;
5160     }
5161   }
5162 
5163   // copy results from shared memory to the destination.
5164   output_offset = 0;
5165   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
5166     TfLiteTensor* tensor = &context->tensors[output_index];
5167     if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
5168       continue;
5169     }
5170     TfLiteType ann_type_equivalent =
5171         mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
5172                                                      output_index);
5173     if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
5174       // Explicitly convert uint8 values to int8 values.
5175       uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
5176           nn_output_memory_->get_data_ptr() + output_offset);
5177       const auto num_elements = NumElements(tensor);
5178       for (int i = 0; i < num_elements; ++i) {
5179         output_ptr[i] =
5180             static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
5181       }
5182     }
5183     memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
5184            tensor->bytes);
5185     size_t tensor_size = tensor->bytes;
5186     if (!can_infer_output_shape && HasUnspecifiedDimension(tensor)) {
5187       tensor_size =
5188           std::max(tensor->bytes, tensor_max_size_hints_[output_index]);
5189     }
5190     output_offset += tensor_size;
5191     output_offset += GetNumPaddingBytes(tensor_size);
5192   }
5193   // The state_out of previous invocation need to be copied to state_in of
5194   // current invocation.
5195   for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
5196     int state_tensor_idx = model_state_tfl_inputs_[i];
5197     TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
5198     memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
5199            tensor->bytes);
5200     output_offset += tensor->bytes;
5201     output_offset += GetNumPaddingBytes(tensor->bytes);
5202   }
5203 
5204   // copy output of all output tensors in feedback_loops_ into the
5205   // associated input
5206   for (auto feedback_loop : feedback_loops_) {
5207     int output_tensor_idx;
5208     int input_tensor_idx;
5209     std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
5210     TfLiteTensor& src = context->tensors[output_tensor_idx];
5211     TfLiteTensor& dest = context->tensors[input_tensor_idx];
5212 
5213     memcpy(dest.data.raw, src.data.raw, src.bytes);
5214   }
5215 
5216   return kTfLiteOk;
5217 }
5218 
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,int tflite_node_index,NNAPIOpBuilder * builder,int * nnapi_errno)5219 void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
5220     const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
5221     int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
5222   // Depending on the operator and the input data format, Dequantize
5223   // operators may need to be added. For example when the input is
5224   // floating-point but weights are quantized then the weights will first be
5225   // dequantized to the same format as the input before being passed to the
5226   // operator.
5227 
5228   // The tensor determining whether the inputs should be floating-point.
5229   int input_tensor_index = -1;
5230   std::vector<int> inputs_to_potentially_dequantize;
5231 
5232   switch (builtin_code) {
5233     case kTfLiteBuiltinConv2d:
5234     case kTfLiteBuiltinFullyConnected: {
5235       input_tensor_index = 0;
5236       // Weights and bias are inputs #1 and #2 respectively and may require
5237       // dequantization.
5238       inputs_to_potentially_dequantize = {1, 2};
5239       break;
5240     }
5241     case kTfLiteBuiltinLstm: {
5242       input_tensor_index = 0;
5243       inputs_to_potentially_dequantize = {1,  2,  3,  4,  5,  6,  7,
5244                                           8,  9,  10, 11, 12, 13, 14,
5245                                           15, 16, 17, 20, 21, 22, 23};
5246       break;
5247     }
5248     default:
5249       return;
5250   }
5251 
5252   int tensor_id = node->inputs->data[input_tensor_index];
5253   if (tensor_id < 0) return;
5254 
5255   // Nothing to do if the input is not floating-point.
5256   if (!IsFloat(context->tensors[tensor_id].type)) return;
5257 
5258   for (int i : inputs_to_potentially_dequantize) {
5259     if (i < 0 || i >= node->inputs->size) continue;  // Ignore invalid index.
5260     tensor_id = node->inputs->data[i];
5261     if (tensor_id < 0) continue;  // Ignore optional input.
5262 
5263     const TfLiteType type = context->tensors[tensor_id].type;
5264     // Nothing to do for this tensor if it's not quantized.
5265     if (!IsQuantized(type)) continue;
5266 
5267     // Insert Dequantize operator if it hasn't been done already and change
5268     // the node's input accordingly.
5269     builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
5270   }
5271 }
5272 
DensifyAndDequantizeConstTensor(TfLiteContext * context,int densify_node_id,bool should_dequantize,NNAPIOpBuilder & builder)5273 TfLiteStatus NNAPIDelegateKernel::DensifyAndDequantizeConstTensor(
5274     TfLiteContext* context, int densify_node_id, bool should_dequantize,
5275     NNAPIOpBuilder& builder) {
5276   TfLiteNode* densify_node;
5277   TfLiteRegistration* reg;
5278   TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5279       context, densify_node_id, &densify_node, &reg));
5280   int sparse_weight_tid = densify_node->inputs->data[0];
5281   auto input_tensor = context->tensors[sparse_weight_tid];
5282   auto output_tensor = context->tensors[densify_node->outputs->data[0]];
5283   if (input_tensor.sparsity == nullptr) {
5284     return kTfLiteError;
5285   }
5286   const int dims_count = output_tensor.dims->size;
5287   std::vector<int> vector_shape(dims_count);
5288   for (int i = 0; i < dims_count; i++) {
5289     vector_shape[i] = output_tensor.dims->data[i];
5290   }
5291   size_t dense_size;
5292   int new_tensor_index = -1;
5293   switch (input_tensor.type) {
5294     case kTfLiteFloat32: {
5295       dense_size = output_tensor.bytes / sizeof(float);
5296       std::vector<float> output_data(dense_size);
5297       tflite::internal::sparsity::FormatConverter<float> converter(
5298           vector_shape, *input_tensor.sparsity);
5299       converter.SparseToDense(static_cast<const float*>(input_tensor.data.data),
5300                               dense_size, output_data.data(), context);
5301       TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
5302           ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
5303           output_data, output_tensor.params, &new_tensor_index));
5304       break;
5305     }
5306     case kTfLiteFloat16: {
5307       dense_size = output_tensor.bytes / sizeof(Eigen::half);
5308       std::vector<uint16_t> output_data(dense_size);
5309       Eigen::half* unpacked_fp16_data =
5310           reinterpret_cast<Eigen::half*>(output_data.data());
5311       tflite::internal::sparsity::FormatConverter<Eigen::half> converter(
5312           vector_shape, *input_tensor.sparsity);
5313       converter.SparseToDense(
5314           static_cast<const Eigen::half*>(input_tensor.data.data), dense_size,
5315           unpacked_fp16_data, context);
5316       if (should_dequantize) {
5317         // we need to dequantize the fp16 dense tensor
5318         std::vector<float> float_dense_data(dense_size);
5319         for (int i = 0; i < dense_size; ++i) {
5320           float_dense_data[i] = fp16_ieee_to_fp32_value(
5321               reinterpret_cast<uint16_t*>(output_data.data())[i]);
5322         }
5323         TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
5324             ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
5325             float_dense_data, output_tensor.params, &new_tensor_index));
5326       } else {
5327         TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<uint16_t>(
5328             ANEURALNETWORKS_TENSOR_FLOAT16, kTfLiteFloat16, output_tensor.dims,
5329             output_data, output_tensor.params, &new_tensor_index));
5330       }
5331       break;
5332     }
5333     case kTfLiteInt8: {
5334       dense_size = output_tensor.bytes / sizeof(int8_t);
5335       std::vector<int8_t> output_data(dense_size);
5336       tflite::internal::sparsity::FormatConverter<int8_t> converter(
5337           vector_shape, *input_tensor.sparsity);
5338       converter.SparseToDense(
5339           static_cast<const int8_t*>(input_tensor.data.data), dense_size,
5340           output_data.data(), context);
5341       TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<int8_t>(
5342           ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
5343           output_tensor.dims, output_data, output_tensor.params,
5344           &new_tensor_index));
5345       break;
5346     }
5347     default: {
5348       return kTfLiteError;
5349     }
5350   }
5351   return kTfLiteOk;
5352 }
5353 
ResizeTfLiteIntArray(TfLiteIntArray * old_array,int new_size,int init_value)5354 TfLiteIntArray* ResizeTfLiteIntArray(TfLiteIntArray* old_array, int new_size,
5355                                      int init_value) {
5356   TfLiteIntArray* ret = TfLiteIntArrayCreate(new_size);
5357   if (ret) {
5358     int size_to_copy = 0;
5359     if (old_array) {
5360       size_to_copy = new_size > old_array->size ? old_array->size : new_size;
5361       memcpy(ret->data, old_array->data, size_to_copy * sizeof(int));
5362     }
5363     for (int i = size_to_copy; i < ret->size; i++) {
5364       ret->data[i] = init_value;
5365     }
5366   }
5367   TfLiteIntArrayFree(old_array);
5368   return ret;
5369 }
5370 
operator ()(NnapiMappingUtilCInterface * mapping_util)5371 void NNFreeMappingUtil::operator()(NnapiMappingUtilCInterface* mapping_util) {
5372   NnapiMappingContext* mapping_context =
5373       reinterpret_cast<NnapiMappingContext*>(mapping_util->context);
5374   delete (mapping_context);
5375   mapping_util->context = nullptr;
5376   free(mapping_util);
5377 }
5378 
5379 class NnapiMappingUtilCInterfaceImpl {
5380  public:
TfLiteIndexToNnIndex(NnapiMappingUtilCInterface * mapping,int index)5381   static int TfLiteIndexToNnIndex(NnapiMappingUtilCInterface* mapping,
5382                                   int index) {
5383     NnapiMappingContext* mapping_context =
5384         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5385     const size_t max_size = mapping_context->lite_tensor_to_ann_tensor_.size();
5386     if (index >= 0 && index < max_size)
5387       return mapping_context->lite_tensor_to_ann_tensor_[index];
5388     else
5389       return -1;
5390   }
5391 
AddNewNonTensorOperand(NnapiMappingUtilCInterface * mapping)5392   static int AddNewNonTensorOperand(NnapiMappingUtilCInterface* mapping) {
5393     NnapiMappingContext* mapping_context =
5394         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5395     return mapping_context->next_ann_tensor_index_++;
5396   }
5397 
AddDelegateGeneratedInputAnnTensorOperand(NnapiMappingUtilCInterface * mapping)5398   static int AddDelegateGeneratedInputAnnTensorOperand(
5399       NnapiMappingUtilCInterface* mapping) {
5400     NnapiMappingContext* mapping_context =
5401         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5402     return mapping_context->next_ann_tensor_index_++;
5403   }
5404 
AddNewNnTensorIndex(NnapiMappingUtilCInterface * mapping,int tflite_index)5405   static int AddNewNnTensorIndex(NnapiMappingUtilCInterface* mapping,
5406                                  int tflite_index) {
5407     NnapiMappingContext* mapping_context =
5408         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5409     const size_t current_size =
5410         mapping_context->lite_tensor_to_ann_tensor_.size();
5411     if (tflite_index >= current_size) {
5412       mapping_context->lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
5413     }
5414     const int new_tensor_index = mapping_context->next_ann_tensor_index_++;
5415     mapping_context->lite_tensor_to_ann_tensor_[tflite_index] =
5416         new_tensor_index;
5417     return new_tensor_index;
5418   }
5419 
TfLiteIndexToNnTypeConversion(NnapiMappingUtilCInterface * mapping,int index)5420   static TfLiteType TfLiteIndexToNnTypeConversion(
5421       NnapiMappingUtilCInterface* mapping, int index) {
5422     NnapiMappingContext* mapping_context =
5423         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5424     const size_t max_size = mapping_context->index_to_type_conversion_.size();
5425     if (index >= 0 && index < max_size)
5426       return static_cast<TfLiteType>(
5427           mapping_context->index_to_type_conversion_[index]);
5428     else
5429       return kTfLiteNoType;
5430   }
5431 
AddTypeConversion(NnapiMappingUtilCInterface * mapping,int tflite_index,TfLiteType tflite_type)5432   static void AddTypeConversion(NnapiMappingUtilCInterface* mapping,
5433                                 int tflite_index, TfLiteType tflite_type) {
5434     NnapiMappingContext* mapping_context =
5435         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5436     const size_t current_size =
5437         mapping_context->index_to_type_conversion_.size();
5438     if (tflite_index >= current_size) {
5439       mapping_context->index_to_type_conversion_.resize(tflite_index + 1,
5440                                                         kTfLiteNoType);
5441     }
5442     mapping_context->index_to_type_conversion_[tflite_index] = tflite_type;
5443   }
5444 
AddNnapiToTfliteOpMapping(NnapiMappingUtilCInterface * mapping,int tflite_node_index)5445   static void AddNnapiToTfliteOpMapping(NnapiMappingUtilCInterface* mapping,
5446                                         int tflite_node_index) {
5447     NnapiMappingContext* mapping_context =
5448         reinterpret_cast<NnapiMappingContext*>(mapping->context);
5449     mapping_context->nnapi_to_tflite_op_mapping_.push_back(tflite_node_index);
5450   }
5451 };
5452 
5453 NnapiMappingUtilCInterface*
NnapiMappingUtilCInterfaceCreate()5454 NNAPIDelegateKernel::NnapiMappingUtilCInterfaceCreate() {
5455   NnapiMappingUtilCInterface* mapping =
5456       static_cast<NnapiMappingUtilCInterface*>(
5457           malloc(sizeof(NnapiMappingUtilCInterface)));
5458   mapping->context = new NnapiMappingContext();
5459   mapping->TfLiteIndexToNnIndex =
5460       NnapiMappingUtilCInterfaceImpl::TfLiteIndexToNnIndex;
5461   mapping->AddNewNonTensorOperand =
5462       NnapiMappingUtilCInterfaceImpl::AddNewNonTensorOperand;
5463   mapping->AddDelegateGeneratedInputAnnTensorOperand =
5464       NnapiMappingUtilCInterfaceImpl::AddDelegateGeneratedInputAnnTensorOperand;
5465   mapping->AddNewNnTensorIndex =
5466       NnapiMappingUtilCInterfaceImpl::AddNewNnTensorIndex;
5467   mapping->TfLiteIndexToNnTypeConversion =
5468       NnapiMappingUtilCInterfaceImpl::TfLiteIndexToNnTypeConversion;
5469   mapping->AddTypeConversion =
5470       NnapiMappingUtilCInterfaceImpl::AddTypeConversion;
5471   mapping->AddNnapiToTfliteOpMapping =
5472       NnapiMappingUtilCInterfaceImpl::AddNnapiToTfliteOpMapping;
5473   return mapping;
5474 }
5475 
AddOpsAndTensors(TfLiteContext * context,int * nnapi_errno,bool allow_dynamic_dimensions)5476 TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
5477     TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
5478   DequantizeMapping dequantize_mapping;
5479   // The operand builder allows creating a single op. It is created outside
5480   // the for loop to avoid reallocating the vectors.
5481   NNAPIOpBuilder builder(nnapi_, context, mapping_util_.get(),
5482                          &dequantize_mapping, &allocation_memory_mapping_,
5483                          nn_model_.get(), nnapi_errno,
5484                          allow_dynamic_dimensions);
5485   // If we have target accelerators the target SDK version might be
5486   // different than the current android version.
5487   target_feature_level_ = nnapi_->nnapi_runtime_feature_level;
5488   if (!nnapi_devices_.empty()) {
5489     TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
5490         context, nnapi_, nnapi_devices_, &target_feature_level_, nnapi_errno));
5491   }
5492   // First path, handle const fp16->fp32 dequantize and densify if needed.
5493   for (auto node_index : nodes_) {
5494     TfLiteNode* node = nullptr;
5495     TfLiteRegistration* registration = nullptr;
5496     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5497         context, node_index, &node, &registration));
5498     if (IsDequantizeConstFloat16(context, node, registration)) {
5499       builder.AddTensorInput(node->inputs->data[0], /*hybrid_op=*/false,
5500                              NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION |
5501                                  NN_TENSOR_FLAG_SCALAR_AS_TENSOR);
5502     }
5503     if (IsDensifyConstTensor(context, node, registration)) {
5504       densify_output_to_node_mapping_[node->outputs->data[0]] = node_index;
5505     }
5506     if (IsDequantizeNonConstFloat16(context, node, registration)) {
5507       non_const_dequantize_output_to_node_mapping_[node->outputs->data[0]] =
5508           node_index;
5509     }
5510   }
5511   // Clear the input and output lists for the dequantize path.
5512   builder.ClearInputOuputLists();
5513 
5514   // Add other tensors.
5515   for (auto node_index : nodes_) {
5516     // Obtain the op and registration.
5517     TfLiteNode* node;
5518     TfLiteRegistration* reg;
5519     TF_LITE_ENSURE_STATUS(
5520         context->GetNodeAndRegistration(context, node_index, &node, &reg));
5521     // skip DENSIFY -> DEQUANTIZE as they are handled elsewhere.
5522     if (IsDensifyConstTensor(context, node, reg) ||
5523         IsDequantizeNonConstFloat16(context, node, reg)) {
5524       continue;
5525     }
5526 
5527     // Use vendor plugin to map the node if needed.
5528     if (vendor_plugin_ && vendor_plugin_->ValidateNode(context, reg, node)) {
5529       TF_LITE_ENSURE_STATUS(vendor_plugin_->MapNode(
5530           context, node, node_index, mapping_util_.get(), nn_model_.get()));
5531       continue;
5532     }
5533     // Delegate PACK by lowering it into CONCAT + RESHAPE.
5534     if (reg->builtin_code == kTfLiteBuiltinPack &&
5535         target_feature_level_ < kNNAPIRuntimeFeatureLevel6) {
5536       TF_LITE_ENSURE_STATUS(
5537           builder.TransformPackIntoSupportedOps(node_index, node, reg));
5538       continue;
5539     }
5540     // Delegate UNPACK by lowering it into RESHAPE + SPLIT.
5541     if (reg->builtin_code == kTfLiteBuiltinUnpack) {
5542       TF_LITE_ENSURE_STATUS(
5543           builder.TransformUnpackIntoSupportedOps(node_index, node, reg));
5544       continue;
5545     }
5546     // Delegate SPLIT_V by lowering it into SLICEs.
5547     if (reg->builtin_code == kTfLiteBuiltinSplitV) {
5548       TF_LITE_ENSURE_STATUS(
5549           builder.TransformSplitVIntoSupportedOps(node_index, node, reg));
5550       continue;
5551     }
5552     // Delegate SQUARED_DIFFERENCE by lowering it into SUB + MUL.
5553     if (reg->builtin_code == kTfLiteBuiltinSquaredDifference) {
5554       TF_LITE_ENSURE_STATUS(builder.TransformSquaredDifferenceIntoSupportedOps(
5555           node_index, node, reg));
5556       continue;
5557     }
5558     // Fully quantized full LSTM.
5559     if (target_feature_level_ >= kMinSdkVersionForNNAPI13 &&
5560         reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
5561         context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
5562       const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
5563 
5564       constexpr int kInputTensor = 0;
5565       constexpr int kInputToInputWeightsTensor = 1;
5566       constexpr int kRecurrentToInputWeightsTensor = 5;
5567       constexpr int kInputGateBiasTensor = 12;
5568       constexpr int kForgetGateBiasTensor = 13;
5569       constexpr int kCellGateBiasTensor = 14;
5570       constexpr int kOutputGateBiasTensor = 15;
5571       constexpr int kProjectionWeightsTensor = 16;
5572       constexpr int kProjectionBiasTensor = 17;
5573       constexpr int kPrevOutputTensor = 18;
5574 
5575       // Add input tensors.
5576       for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
5577         const auto input_index = node->inputs->data[input_pos];
5578         if (input_index == kTfLiteOptionalTensor) {
5579           if (input_pos == kInputToInputWeightsTensor ||
5580               input_pos == kRecurrentToInputWeightsTensor ||
5581               input_pos == kProjectionWeightsTensor) {
5582             TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
5583           } else if (input_pos == kInputGateBiasTensor ||
5584                      input_pos == kForgetGateBiasTensor ||
5585                      input_pos == kCellGateBiasTensor ||
5586                      input_pos == kOutputGateBiasTensor ||
5587                      input_pos == kProjectionBiasTensor) {
5588             TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
5589           } else {  // cell-to-* and layer norm weights.
5590             TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
5591           }
5592         } else {
5593           // Only input and previous output use INT8_ASYM_SIGNED.
5594           int flags =
5595               (input_pos == kInputTensor || input_pos == kPrevOutputTensor)
5596                   ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
5597                   : 0;
5598           TF_LITE_ENSURE_STATUS(
5599               builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
5600         }
5601       }
5602 
5603       // Add clip parameters.
5604       auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
5605       TF_LITE_ENSURE_STATUS(
5606           builder.AddScalarFloat32Operand(builtin->cell_clip));
5607       TF_LITE_ENSURE_STATUS(
5608           builder.AddScalarFloat32Operand(builtin->proj_clip));
5609 
5610       // Add quantization parameters for intermediate tensors.
5611       TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
5612       for (int intermediate_pos = 0;
5613            intermediate_pos < node->intermediates->size; ++intermediate_pos) {
5614         const auto intermediate_index =
5615             node->intermediates->data[intermediate_pos];
5616         const TfLiteTensor& tensor = context->tensors[intermediate_index];
5617         TfLiteAffineQuantization* quantization_params =
5618             static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
5619         if (intermediate_pos == 4) {
5620           TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5621               quantization_params->zero_point->data[0]));
5622         }
5623         TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
5624             quantization_params->scale->data[0]));
5625       }
5626 
5627       // Activation state output.
5628       int ann_index;
5629       builder.AddStateInt8AsymTensor(
5630           node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
5631       model_state_outputs_.push_back(ann_index);
5632       model_state_tfl_inputs_.push_back(
5633           node->inputs->data[/*kInputActivationStateTensor*/ 18]);
5634 
5635       // Cell state output.
5636       builder.AddStateInt16Tensor(
5637           node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
5638       model_state_outputs_.push_back(ann_index);
5639       model_state_tfl_inputs_.push_back(
5640           node->inputs->data[/*kInputCellStateTensor*/ 19]);
5641 
5642       // Add output tensors.
5643       for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
5644         const auto output_index = node->outputs->data[output_pos];
5645         TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
5646             output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
5647       }
5648 
5649       builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
5650       continue;
5651     }
5652 
5653     const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
5654     const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
5655     const bool need_int8_conversion =
5656         target_feature_level_ < kMinSdkVersionForNNAPI13 &&
5657         NeedInt8Conversion(context, reg->builtin_code, node);
5658     const bool use_int8_asymm_signed =
5659         target_feature_level_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
5660 
5661     // skip DEQUANTIZE (fp16 -> fp32) as it is handled elsewhere
5662     if (IsDequantizeConstFloat16(context, node, reg)) {
5663       continue;
5664     }
5665 
5666     int input_tensor_flags = 0;
5667     if (scalar_as_tensor) {
5668       input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
5669     }
5670     if (use_int8_asymm_signed) {
5671       input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
5672     }
5673 
5674     // On SDK level less than 30, h_swish will be lowered into supported NNAPI
5675     // operations. Since SDK level 30, h_swish is supported as a single
5676     // operation.
5677     if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
5678         nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
5679       builder.TransformHardSwishIntoSupportedOps(
5680           node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
5681           node_index);
5682       continue;
5683     }
5684     // For PACK, NNAPI expects the axis scalar before all input tensors.
5685     if (reg->builtin_code == kTfLiteBuiltinPack) {
5686       const auto* builtin =
5687           reinterpret_cast<TfLitePackParams*>(node->builtin_data);
5688       // NNAPI only accepts non-negative axis.
5689       auto& input_tensor = context->tensors[node->inputs->data[0]];
5690       int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
5691                                    : builtin->axis;
5692       TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(axis));
5693     }
5694     // Map inputs to NN API tensor indices.
5695     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
5696       if (node->inputs->data[input_pos] != kTfLiteOptionalTensor &&
5697           context->tensors[node->inputs->data[input_pos]].type ==
5698               kTfLiteFloat16 &&
5699           IsConstantTensor(&context->tensors[node->inputs->data[input_pos]])) {
5700         input_tensor_flags |= NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
5701       }
5702       if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
5703         // Everything is added during Map since input tensors
5704         // have different order.
5705         continue;
5706       }
5707       if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
5708           node->inputs->data[input_pos] == kTfLiteOptionalTensor) {
5709         // skip optional bias and handle it during mapping
5710         continue;
5711       }
5712       const auto input_index = node->inputs->data[input_pos];
5713       // handle sparse weights for Conv2d
5714       if (reg->builtin_code == kTfLiteBuiltinConv2d && input_pos == 1) {
5715         int densify_node_id = -1;
5716         bool should_dequantize = false;
5717         int dequantize_node_id =
5718             non_const_dequantize_output_to_node_mapping_[input_index];
5719         if (dequantize_node_id != -1) {
5720           should_dequantize = true;
5721           // Find densify->dequantize pattern.
5722           TfLiteNode* dequant_node;
5723           TfLiteRegistration* reg;
5724           TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5725               context, dequantize_node_id, &dequant_node, &reg));
5726           densify_node_id =
5727               densify_output_to_node_mapping_[dequant_node->inputs->data[0]];
5728         } else {
5729           densify_node_id = densify_output_to_node_mapping_[input_index];
5730         }
5731         if (densify_node_id != -1) {
5732           TF_LITE_ENSURE_STATUS(DensifyAndDequantizeConstTensor(
5733               context, densify_node_id, should_dequantize, builder));
5734           continue;
5735         }
5736       }
5737       if (need_int8_conversion &&
5738           (input_pos == 0 ||
5739            reg->builtin_code == kTfLiteBuiltinFullyConnected ||
5740            reg->builtin_code == kTfLiteBuiltinConv2d ||
5741            reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
5742            reg->builtin_code == kTfLiteBuiltinAdd ||
5743            reg->builtin_code == kTfLiteBuiltinMul ||
5744            reg->builtin_code == kTfLiteBuiltinSub ||
5745            reg->builtin_code == kTfLiteBuiltinConcatenation ||
5746            reg->builtin_code == kTfLiteBuiltinMaximum ||
5747            reg->builtin_code == kTfLiteBuiltinMinimum ||
5748            reg->builtin_code == kTfLiteBuiltinLeakyRelu ||
5749            reg->builtin_code == kTfLiteBuiltinLess ||
5750            reg->builtin_code == kTfLiteBuiltinLessEqual ||
5751            reg->builtin_code == kTfLiteBuiltinPrelu ||
5752            reg->builtin_code == kTfLiteBuiltinGreater ||
5753            reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
5754            reg->builtin_code == kTfLiteBuiltinEqual ||
5755            reg->builtin_code == kTfLiteBuiltinNotEqual ||
5756            reg->builtin_code == kTfLiteBuiltinSelect)) {
5757         // Only selected inputs require int8 conversion.
5758         TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
5759             input_index, hybrid_op,
5760             input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
5761         continue;
5762       }
5763       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
5764           input_pos >= 20) {
5765         // Skip layer normalization weights. They are added in the Map
5766         // function (after all the other inputs added there) since layer
5767         // normalization weights are the last four inputs of the LSTM op in
5768         // NNAPI.
5769         continue;
5770       }
5771       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
5772         // Configuring all inputs in the Map function
5773         continue;
5774       }
5775       if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
5776         if (input_pos >= 20) {
5777           // Skip layer normalization weights. They are added in the Map
5778           // function (after all the other inputs added there) since layer
5779           // normalization weights are the last four inputs of the
5780           // unidirectional sequence LSTM op in NNAPI.
5781           continue;
5782         }
5783         if (input_index == kTfLiteOptionalTensor) {
5784           TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
5785           continue;
5786         }
5787       }
5788       if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
5789           (input_index == node->inputs->data[0])) {
5790         // Skip the axis input tensor; it will be added as a scalar operand
5791         // by the Map() mapping.
5792         continue;
5793       }
5794 
5795       // Pad and Padv2 have an optional parameter for a pad value which has
5796       // to be converted to a scalar type in NN API.
5797       if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
5798            reg->builtin_code == kTfLiteBuiltinPad) &&
5799           node->inputs->size == 3 && input_pos == 2) {
5800         const int constant_value_id = node->inputs->data[2];
5801         if (constant_value_id == kTfLiteOptionalTensor) {
5802           continue;
5803         }
5804         const TfLiteTensor constant_value = context->tensors[constant_value_id];
5805 
5806         switch (constant_value.type) {
5807           case kTfLiteFloat32:
5808             if (constant_value.allocation_type == kTfLiteMmapRo) {
5809               builder.AddScalarFloat32Operand(*constant_value.data.f);
5810             } else {
5811               builder.AddSingleValueTensorAsScalarOperand(
5812                   constant_value_id, ANEURALNETWORKS_FLOAT32);
5813             }
5814             break;
5815           case kTfLiteUInt8:
5816             if (constant_value.allocation_type == kTfLiteMmapRo) {
5817               builder.AddScalarInt32Operand(
5818                   static_cast<int32_t>(*constant_value.data.uint8));
5819             } else {
5820               builder.AddSingleValueTensorAsScalarOperand(
5821                   constant_value_id, ANEURALNETWORKS_INT32);
5822             }
5823             break;
5824           case kTfLiteInt8:
5825             if (constant_value.allocation_type == kTfLiteMmapRo) {
5826               if (need_int8_conversion) {
5827                 builder.AddScalarInt32Operand(
5828                     static_cast<int32_t>(*constant_value.data.int8) + 128);
5829               } else {
5830                 builder.AddScalarInt32Operand(*constant_value.data.int8);
5831               }
5832             } else {
5833               builder.AddSingleValueTensorAsScalarOperand(
5834                   constant_value_id, ANEURALNETWORKS_INT32);
5835             }
5836             break;
5837           default:
5838             TF_LITE_KERNEL_LOG(context,
5839                                "Unsupported type of pad value for pad_v2\n");
5840             return kTfLiteError;
5841         }
5842         continue;
5843       }
5844 
5845       if (input_index == kTfLiteOptionalTensor &&
5846           (reg->builtin_code == kTfLiteBuiltinLstm ||
5847            reg->builtin_code == kTfLiteBuiltinSvdf ||
5848            reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
5849         // properly handle the optional tensor for LSTM and SVDF.
5850         // currently only support float32.
5851         TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
5852       } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
5853                  reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
5854         if (input_pos == 0) {
5855           // Only the first input tensor is added. The second one,
5856           // specifying the output height and width, is not added and
5857           // instead the height and width will be added individually as
5858           // scalars by the mapping function returned by Map().
5859           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5860                                                        input_tensor_flags));
5861         }
5862       } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
5863         // The K parameter tensor is not handled here but by the functor
5864         // returned by Map, the input tensor is instead added in
5865         // the else clause below
5866         continue;
5867       } else if (reg->builtin_code == kTfLiteBuiltinGather) {
5868         // Everything else is added during Map since input tensors
5869         // have different order.
5870         if (input_pos == 0) {
5871           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5872                                                        input_tensor_flags));
5873         }
5874         continue;
5875       } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
5876                  input_pos == 1) {
5877         // The axis param is added during Map
5878         continue;
5879       } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
5880                  input_pos == 2) {
5881         // NNAPI does not support crops.
5882         // The Map function will check if all crops are zero.
5883         continue;
5884       } else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
5885                  reg->builtin_code == kTfLiteBuiltinArgMax) {
5886         // The first input tensor is added as is. The second one, specifying
5887         // the axis, needs to be converted to a scalar since TFLite uses a
5888         // tensor but NNAPI uses a scalar as the axis.
5889         if (input_pos == 0) {
5890           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5891                                                        input_tensor_flags));
5892         } else {
5893           const int axis_id = node->inputs->data[1];
5894           const TfLiteTensor& axis_tensor = context->tensors[axis_id];
5895           switch (axis_tensor.type) {
5896             case kTfLiteInt32:
5897               if (axis_tensor.allocation_type == kTfLiteMmapRo) {
5898                 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5899                     static_cast<int32_t>(*axis_tensor.data.i32)));
5900               } else {
5901                 TF_LITE_ENSURE_STATUS(
5902                     builder.AddSingleValueTensorAsScalarOperand(
5903                         axis_id, ANEURALNETWORKS_INT32));
5904               }
5905               break;
5906             case kTfLiteInt64:
5907               // Map() function already makes sure int64 input is constant.
5908               TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5909                   static_cast<int32_t>(*axis_tensor.data.i64)));
5910               break;
5911             default:
5912               return kTfLiteError;
5913           }
5914         }
5915       } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
5916                  reg->builtin_code == kTfLiteBuiltinMinimum) {
5917         const TfLiteTensor& operand_tensor =
5918             context->tensors[node->inputs->data[input_pos]];
5919         if (operand_tensor.dims->size == 0) {
5920           int tensor_index;
5921 
5922           TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
5923                             kTfLiteMmapRo);
5924           switch (operand_tensor.type) {
5925             case kTfLiteFloat32:
5926               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5927                   ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
5928                   std::vector<float>(1, operand_tensor.data.f[0]),
5929                   operand_tensor.params, &tensor_index));
5930               break;
5931             case kTfLiteUInt8:
5932               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5933                   ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
5934                   std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
5935                   operand_tensor.params, &tensor_index));
5936               break;
5937             case kTfLiteInt8: {
5938               auto params = operand_tensor.params;
5939               if (params.scale == 0.0) {
5940                 params.scale = 1.0;
5941               }
5942 
5943               if (use_int8_asymm_signed) {
5944                 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5945                     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
5946                     operand_tensor.type, {1},
5947                     std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
5948                     &tensor_index));
5949               } else {
5950                 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5951                     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
5952                     {1},
5953                     std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
5954                     params, &tensor_index));
5955               }
5956             } break;
5957             case kTfLiteInt32:
5958               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5959                   ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
5960                   std::vector<int32_t>(1, operand_tensor.data.i32[0]),
5961                   operand_tensor.params, &tensor_index));
5962               break;
5963             default:
5964               return kTfLiteError;
5965           }
5966         } else {
5967           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5968                                                        input_tensor_flags));
5969         }
5970       } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
5971                   reg->builtin_code == kTfLiteBuiltinReduceMax ||
5972                   reg->builtin_code == kTfLiteBuiltinReduceMin ||
5973                   reg->builtin_code == kTfLiteBuiltinReduceProd ||
5974                   reg->builtin_code == kTfLiteBuiltinSum ||
5975                   reg->builtin_code == kTfLiteBuiltinMean) &&
5976                  (input_pos == 1)) {
5977         // The axis needs, be converted to a tensor if specified as scalar
5978         const TfLiteTensor& axis_tensor =
5979             context->tensors[node->inputs->data[input_pos]];
5980         if (axis_tensor.dims->size == 0) {
5981           TF_LITE_ENSURE_STATUS(
5982               builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
5983         } else {
5984           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5985                                                        input_tensor_flags));
5986         }
5987       } else if (reg->builtin_code == kTfLiteBuiltinFill) {
5988         if (input_pos == 0) {
5989           const int dims_id = node->inputs->data[0];
5990           const TfLiteTensor& dims_tensor = context->tensors[dims_id];
5991           switch (dims_tensor.type) {
5992             case kTfLiteInt32:
5993               TF_LITE_ENSURE_STATUS(
5994                   builder.AddTensorInput(input_index, hybrid_op));
5995               break;
5996             case kTfLiteInt64: {
5997               // We made sure that dimensions are constant and fit into int32
5998               // in Map(), so we can safely create a new tensor with casted
5999               // values.
6000               const int dims_size = dims_tensor.dims->data[0];
6001               std::vector<int32_t> dims_int32(dims_size);
6002               std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
6003                         dims_int32.begin());
6004               int new_tensor_index = -1;
6005               builder.AddNewInputConstantTensor(
6006                   ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
6007                   dims_int32, dims_tensor.params, &new_tensor_index);
6008             } break;
6009             default:
6010               return kTfLiteError;
6011           }
6012         } else {
6013           const int value_id = node->inputs->data[1];
6014           const TfLiteTensor& value_tensor = context->tensors[value_id];
6015           switch (value_tensor.type) {
6016             case kTfLiteFloat32:
6017               if (value_tensor.allocation_type == kTfLiteMmapRo) {
6018                 TF_LITE_ENSURE_STATUS(
6019                     builder.AddScalarFloat32Operand(*value_tensor.data.f));
6020               } else {
6021                 TF_LITE_ENSURE_STATUS(
6022                     builder.AddSingleValueTensorAsScalarOperand(
6023                         value_id, ANEURALNETWORKS_FLOAT32));
6024               }
6025               break;
6026             case kTfLiteInt32:
6027               if (value_tensor.allocation_type == kTfLiteMmapRo) {
6028                 TF_LITE_ENSURE_STATUS(
6029                     builder.AddScalarInt32Operand(*value_tensor.data.i32));
6030               } else {
6031                 TF_LITE_ENSURE_STATUS(
6032                     builder.AddSingleValueTensorAsScalarOperand(
6033                         value_id, ANEURALNETWORKS_INT32));
6034               }
6035               break;
6036             case kTfLiteInt64:
6037               if (value_tensor.allocation_type == kTfLiteMmapRo) {
6038                 // Map() function already makes sure const int64 input fits into
6039                 // int32.
6040                 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
6041                     static_cast<int32_t>(*value_tensor.data.i64)));
6042               } else {
6043                 TF_LITE_ENSURE_STATUS(
6044                     builder.AddSingleValueTensorAsScalarOperand(
6045                         value_id, ANEURALNETWORKS_INT32));
6046               }
6047               break;
6048             default:
6049               return kTfLiteError;
6050           }
6051         }
6052       } else {
6053         TF_LITE_ENSURE_STATUS(
6054             builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
6055       }
6056     }
6057 
6058     // Get op type and operands
6059     // Fails if the Validate function failed
6060     int nn_op_type;
6061     TF_LITE_ENSURE_STATUS(
6062         Map(context, reg->builtin_code, reg->version, target_feature_level_,
6063             {context, &builder, node, node_index, &model_state_outputs_,
6064              &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
6065             &nn_op_type));
6066 
6067     // Map outputs to NN API tensor indices.
6068     int output_tensor_flags = 0;
6069     if (need_int8_conversion) {
6070       output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
6071     }
6072     if (use_int8_asymm_signed) {
6073       output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
6074     }
6075     // fc_nn_intermediate_output_index is used to indicate whether additional
6076     // RESHAPE op is needed.
6077     int fc_nn_intermediate_output_index = -1;
6078     // mean_nn_intermediate_output_index is used to indicate whether additional
6079     // re-quantization is needed.
6080     int mean_nn_intermediate_output_index = -1;
6081     for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
6082       auto output_index = node->outputs->data[output_pos];
6083 
6084       // Outputs for  basic LSTM cell are set in the Map function since
6085       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
6086         continue;
6087       }
6088       // Handle FC with keep_num_dims==true.
6089       if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
6090           reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data)
6091               ->keep_num_dims) {
6092         auto& output_tensor = context->tensors[output_index];
6093 
6094         int num_units = output_tensor.dims->data[output_tensor.dims->size - 1];
6095         std::vector<uint32_t> output_dims(2);
6096         output_dims[0] = NumElements(output_tensor.dims) / num_units;
6097         output_dims[1] = num_units;
6098         TF_LITE_ENSURE_STATUS(builder.AddIntermediateOutputTensor(
6099             output_tensor.type, output_dims.size(), output_dims.data(),
6100             output_tensor.params.scale, output_tensor.params.zero_point,
6101             &fc_nn_intermediate_output_index));
6102       } else if (reg->builtin_code == kTfLiteBuiltinMean &&
6103                  IsMeanWithDifferentInputOutputQuantization(context, node)) {
6104         // Handle MEAN with different input and output quantization params.
6105         auto& input_tensor = context->tensors[node->inputs->data[0]];
6106         auto& output_tensor = context->tensors[output_index];
6107         TF_LITE_ENSURE_STATUS(builder.AddIntermediateOutputTensor(
6108             output_tensor.type, output_tensor.dims->size,
6109             reinterpret_cast<const uint32_t*>(output_tensor.dims->data),
6110             input_tensor.params.scale, input_tensor.params.zero_point,
6111             &mean_nn_intermediate_output_index, need_int8_conversion));
6112       } else {
6113         TF_LITE_ENSURE_STATUS(
6114             builder.AddTensorOutput(output_index, output_tensor_flags));
6115       }
6116     }
6117 
6118     // Dequantize operators may have to be added in case inputs are to be
6119     // floating-point.
6120     AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
6121                                       node_index, &builder, nnapi_errno);
6122 
6123     TF_LITE_ENSURE_OK(context_,
6124                       builder.FinalizeAddOperation(nn_op_type, node_index));
6125     if (fc_nn_intermediate_output_index > -1) {
6126       TF_LITE_ENSURE_STATUS(builder.AppendReshape(
6127           fc_nn_intermediate_output_index, node->outputs->data[0], node_index));
6128     }
6129     if (mean_nn_intermediate_output_index > -1) {
6130       TF_LITE_ENSURE_STATUS(builder.AppendRequantize(
6131           mean_nn_intermediate_output_index, node->outputs->data[0], node_index,
6132           output_tensor_flags));
6133     }
6134   }
6135   return kTfLiteOk;
6136 }
6137 
BuildGraph(TfLiteContext * context,const StatefulNnApiDelegate::Options & delegate_options,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors,int * nnapi_errno)6138 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
6139     TfLiteContext* context,
6140     const StatefulNnApiDelegate::Options& delegate_options,
6141     const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
6142     int* nnapi_errno) {
6143   // Build the ops and tensors.
6144   TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
6145       context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
6146   // Map input and output tensor indices to ANN
6147   std::vector<uint32_t> inputs;
6148   inputs.reserve(input_tensors->size);
6149   std::vector<uint32_t> outputs;
6150   outputs.reserve(output_tensors->size);
6151 
6152   size_t total_input_byte_size = 0;
6153   // Make the TensorFlow Lite inputs and outputs to ann_indices.
6154   for (int i : TfLiteIntArrayView(input_tensors)) {
6155     // Constant tensors are not NNAPI inputs.
6156     if (i != kTfLiteOptionalTensor &&
6157         context->tensors[i].allocation_type != kTfLiteMmapRo &&
6158         // The delegate might not have mapped this input (this can
6159         // happen if one tensor is split in several ones)
6160         mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i) != -1) {
6161       inputs.push_back(
6162           mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i));
6163       if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
6164         continue;
6165       }
6166       const TfLiteType nn_type_conversion =
6167           mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(), i);
6168       int tensor_size = 0;
6169       if (nn_type_conversion == kTfLiteNoType) {
6170         tensor_size =
6171             std::max(context->tensors[i].bytes, tensor_max_size_hints_[i]);
6172       } else {
6173         size_t type_size;
6174         TF_LITE_ENSURE_OK(
6175             context, GetSizeOfType(context, nn_type_conversion, &type_size));
6176         tensor_size = NumElements(&context->tensors[i]) * type_size;
6177       }
6178       total_input_byte_size += tensor_size;
6179       total_input_byte_size += GetNumPaddingBytes(tensor_size);
6180     }
6181   }
6182 
6183   size_t total_output_byte_size = 0;
6184   for (int i : TfLiteIntArrayView(output_tensors)) {
6185     const int output_tensor_ann_index =
6186         mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i);
6187     // Unmapped outputs are not added
6188     if (output_tensor_ann_index != -1) {
6189       outputs.push_back(output_tensor_ann_index);
6190     }
6191     if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
6192       continue;
6193     }
6194     size_t tensor_size =
6195         std::max(context->tensors[i].bytes, tensor_max_size_hints_[i]);
6196     total_output_byte_size += tensor_size;
6197     total_output_byte_size += GetNumPaddingBytes(tensor_size);
6198   }
6199 
6200   // Add state output tensors as model outputs.
6201   for (int i = 0; i < model_state_outputs_.size(); i++) {
6202     outputs.push_back(model_state_outputs_[i]);
6203     auto tfl_state_idx = model_state_tfl_inputs_[i];
6204     total_output_byte_size += context->tensors[tfl_state_idx].bytes;
6205     total_output_byte_size +=
6206         GetNumPaddingBytes(context->tensors[tfl_state_idx].bytes);
6207   }
6208 
6209   // Tell ANN to declare inputs/outputs
6210   RETURN_TFLITE_ERROR_IF_NN_ERROR(
6211       context,
6212       nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
6213           nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
6214           outputs.data()),
6215       "identifying model inputs and outputs", nnapi_errno);
6216 
6217   auto allow_fp16 =
6218       context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
6219   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
6220     RETURN_TFLITE_ERROR_IF_NN_ERROR(
6221         context,
6222         nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
6223             nn_model_.get(), allow_fp16),
6224         "set relaxed computation mode for fp32 if possible", nnapi_errno);
6225   }
6226 
6227   RETURN_TFLITE_ERROR_IF_NN_ERROR(
6228       context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
6229       "finalizing the model", nnapi_errno);
6230 
6231   // Create shared memory pool for inputs and outputs.
6232   nn_input_memory_ =
6233       std::make_unique<NNMemory>(nnapi_, "input_pool", total_input_byte_size);
6234   nn_output_memory_ =
6235       std::make_unique<NNMemory>(nnapi_, "output_pool", total_output_byte_size);
6236 
6237   return kTfLiteOk;
6238 }
6239 
LogCompilationInfoOnce(const NnApi * nnapi,const ANeuralNetworksDiagnosticCompilationInfo * info)6240 void NNAPIDelegateKernel::LogCompilationInfoOnce(
6241     const NnApi* nnapi, const ANeuralNetworksDiagnosticCompilationInfo* info) {
6242   TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO,
6243                        "NNAPI SL compilation callback called.");
6244 
6245   const int32_t session_id =
6246       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getSessionId(info);
6247   const int32_t error_code =
6248       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getErrorCode(info);
6249   const uint64_t compilation_time_ns =
6250       nnapi
6251           ->SL_ANeuralNetworksDiagnosticCompilationInfo_getCompilationTimeNanos(
6252               info);
6253   const int64_t nnapi_version =
6254       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getNnApiVersion(info);
6255   const uint8_t model_arch_hash_first_byte =
6256       *nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getModelArchHash(
6257           info);
6258   const std::string device_ids_string = std::string(
6259       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getDeviceIds(info));
6260   const ANeuralNetworksDiagnosticDataClass input_data_class =
6261       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getInputDataClass(
6262           info);
6263   const ANeuralNetworksDiagnosticDataClass output_data_class =
6264       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getOutputDataClass(
6265           info);
6266   const bool is_caching_enabled =
6267       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_isCachingEnabled(info);
6268   const bool is_control_flow_used =
6269       nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_isControlFlowUsed(
6270           info);
6271 
6272   TFLITE_LOG_PROD_ONCE(
6273       TFLITE_LOG_INFO,
6274       "Compilation info: getSessionId=%d getErrorCode=%d "
6275       "getCompilationTimeNanos=%" PRIu64 " getNnApiVersion=%" PRId64
6276       " getDeviceIds=%s getModelArchHash=%x getInputDataClass=%d "
6277       "getOutputDataClass=%d isCachingEnabled=%s isControlFlowUser=%s",
6278       session_id, error_code, compilation_time_ns, nnapi_version,
6279       device_ids_string.c_str(), unsigned{model_arch_hash_first_byte},
6280       input_data_class, output_data_class, is_caching_enabled ? "Y" : "N",
6281       is_control_flow_used ? "Y" : "N");
6282 }
6283 
LogExecutionInfoOnce(const NnApi * nnapi,const ANeuralNetworksDiagnosticExecutionInfo * info)6284 void NNAPIDelegateKernel::LogExecutionInfoOnce(
6285     const NnApi* nnapi, const ANeuralNetworksDiagnosticExecutionInfo* info) {
6286   TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "NNAPI SL execution callback called.");
6287 
6288   const int32_t session_id =
6289       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getSessionId(info);
6290 
6291   const int32_t error_code =
6292       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getErrorCode(info);
6293 
6294   const int64_t nnapi_version =
6295       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getNnApiVersion(info);
6296 
6297   const uint8_t model_arch_hash_first_byte =
6298       *nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getModelArchHash(info);
6299   const std::string device_ids_string = std::string(
6300       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getDeviceIds(info));
6301   const ANeuralNetworksDiagnosticDataClass input_data_class =
6302       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getInputDataClass(info);
6303   const ANeuralNetworksDiagnosticDataClass output_data_class =
6304       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getOutputDataClass(info);
6305   const bool is_caching_enabled =
6306       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_isCachingEnabled(info);
6307   const bool is_control_flow_used =
6308       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_isControlFlowUsed(info);
6309   const ANeuralNetworksDiagnosticExecutionMode execution_mode =
6310       nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getExecutionMode(info);
6311 
6312   const uint64_t runtime_time_ns =
6313       nnapi
6314           ->SL_ANeuralNetworksDiagnosticExecutionInfo_getRuntimeExecutionTimeNanos(  // NOLINT line too long
6315               info);
6316 
6317   const uint64_t driver_time_ns =
6318       nnapi
6319           ->SL_ANeuralNetworksDiagnosticExecutionInfo_getDriverExecutionTimeNanos(  // NOLINT line too long
6320               info);
6321 
6322   const uint64_t hardware_time_ns =
6323       nnapi
6324           ->SL_ANeuralNetworksDiagnosticExecutionInfo_getHardwareExecutionTimeNanos(  // NOLINT line too long
6325               info);
6326 
6327   TFLITE_LOG_PROD_ONCE(
6328       TFLITE_LOG_INFO,
6329       "Execution info: getSessionId=%d getErrorCode=%d "
6330       "getNnApiVersion=%" PRId64
6331       " getModelArchHash=%x getDeviceIds=%s getInputDataClass=%d "
6332       "getOutputDataClass=%d isCachingEnabled=%s isControlFlowUsed=%s "
6333       "getExecutionMode=%d getRuntimeExecutionTimeNanos=%" PRIu64
6334       " getDriverExecutionTimeNanos=%" PRIu64
6335       " getHardwareExecutionTimeNanos=%" PRIu64,
6336       session_id, error_code, nnapi_version,
6337       unsigned{model_arch_hash_first_byte}, device_ids_string.c_str(),
6338       input_data_class, output_data_class, is_caching_enabled ? "Y" : "N",
6339       is_control_flow_used ? "Y" : "N", execution_mode, runtime_time_ns,
6340       driver_time_ns, hardware_time_ns);
6341 }
6342 
6343 }  // namespace nnapi
6344 }  // namespace delegate
6345 
6346 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
6347 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
6348 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
6349 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
6350 
Data(const NnApi * nnapi)6351 StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
Data(std::unique_ptr<const NnApi> nnapi)6352 StatefulNnApiDelegate::Data::Data(std::unique_ptr<const NnApi> nnapi)
6353     : nnapi(nnapi.get()), owned_nnapi(std::move(nnapi)) {}
6354 
~Data()6355 StatefulNnApiDelegate::Data::~Data() {
6356   std::for_each(std::begin(delegate_state_cache),
6357                 std::end(delegate_state_cache),
6358                 [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
6359                   delete entry.second;
6360                 });
6361 }
6362 
CacheDelegateKernel(const TfLiteDelegateParams * delegate_params,NNAPIDelegateKernel * delegate_state)6363 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
6364     const TfLiteDelegateParams* delegate_params,
6365     NNAPIDelegateKernel* delegate_state) {
6366   const int cache_key = delegate_params->nodes_to_replace->data[0];
6367   delegate_state_cache.emplace(cache_key, delegate_state);
6368 }
6369 
MaybeGetCachedDelegateKernel(const TfLiteDelegateParams * delegate_params)6370 NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
6371     const TfLiteDelegateParams* delegate_params) {
6372   const int cache_key = delegate_params->nodes_to_replace->data[0];
6373   const auto cached_state = delegate_state_cache.find(cache_key);
6374   if (cached_state != std::end(delegate_state_cache)) {
6375     auto result = cached_state->second;
6376     delegate_state_cache.erase(cached_state);
6377     return result;
6378   } else {
6379     return nullptr;
6380   }
6381 }
6382 
StatefulNnApiDelegateConstructorImpl(const Options & options)6383 void StatefulNnApiDelegate::StatefulNnApiDelegateConstructorImpl(
6384     const Options& options) {
6385   if (options.accelerator_name) {
6386     delegate_data_.accelerator_name = options.accelerator_name;
6387   }
6388   if (options.cache_dir) {
6389     delegate_data_.cache_dir = options.cache_dir;
6390   }
6391   if (options.model_token) {
6392     delegate_data_.model_token = options.model_token;
6393   }
6394   delegate_data_.execution_preference = options.execution_preference;
6395   delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
6396   delegate_data_.max_number_delegated_partitions =
6397       options.max_number_delegated_partitions;
6398   delegate_data_.allow_fp16 = options.allow_fp16;
6399   delegate_data_.execution_priority = options.execution_priority;
6400   delegate_data_.max_compilation_timeout_duration_ns =
6401       options.max_compilation_timeout_duration_ns;
6402   delegate_data_.max_execution_timeout_duration_ns =
6403       options.max_execution_timeout_duration_ns;
6404   delegate_data_.max_execution_loop_timeout_duration_ns =
6405       options.max_execution_loop_timeout_duration_ns;
6406   if (delegate_data_.nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
6407     delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
6408   }
6409   delegate_data_.use_burst_computation = options.use_burst_computation;
6410   delegate_data_.vendor_compilation_hints = options.vendor_compilation_hints;
6411   delegate_data_.vendor_execution_hints = options.vendor_execution_hints;
6412   delegate_data_.vendor_plugin = options.vendor_plugin;
6413   delegate_data_.max_execution_cache_size = options.max_execution_cache_size;
6414   delegate_data_.tensor_max_size_hints = options.tensor_max_size_hints;
6415 
6416   TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
6417                        "Created TensorFlow Lite delegate for NNAPI.");
6418   Prepare = DoPrepare;
6419   CopyFromBufferHandle = DoCopyFromBufferHandle;
6420   CopyToBufferHandle = DoCopyToBufferHandle;
6421   FreeBufferHandle = DoFreeBufferHandle;
6422   data_ = &delegate_data_;
6423   if (delegate_data_.allow_dynamic_dimensions) {
6424     flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
6425     // TFLite cannot propagate tensor shapes if custom operators are used.
6426     if (!delegate_data_.vendor_plugin) {
6427       flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
6428     }
6429   }
6430 }
6431 
StatefulNnApiDelegate(const NnApi * nnapi)6432 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
6433     : StatefulNnApiDelegate(nnapi, Options()) {}
6434 
StatefulNnApiDelegate(Options options)6435 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
6436     : StatefulNnApiDelegate(NnApiImplementation(), options) {}
6437 
StatefulNnApiDelegate(const NnApiSLDriverImplFL5 * nnapi_support_library_driver,Options options)6438 StatefulNnApiDelegate::StatefulNnApiDelegate(
6439     const NnApiSLDriverImplFL5* nnapi_support_library_driver, Options options)
6440     : TfLiteDelegate(TfLiteDelegateCreate()),
6441       delegate_data_(
6442           CreateNnApiFromSupportLibrary(nnapi_support_library_driver)) {
6443   StatefulNnApiDelegateConstructorImpl(options);
6444 }
6445 
StatefulNnApiDelegate(const NnApi * nnapi,Options options)6446 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
6447                                              Options options)
6448     : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
6449   StatefulNnApiDelegateConstructorImpl(options);
6450 }
6451 
StatefulNnApiDelegate()6452 StatefulNnApiDelegate::StatefulNnApiDelegate()
6453     : StatefulNnApiDelegate(Options()) {}
6454 
GetOptions(TfLiteDelegate * delegate)6455 const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
6456     TfLiteDelegate* delegate) {
6457   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6458   StatefulNnApiDelegate::Options options;
6459   options.execution_preference = delegate_data->execution_preference;
6460   options.accelerator_name = delegate_data->accelerator_name.empty()
6461                                  ? nullptr
6462                                  : delegate_data->accelerator_name.c_str();
6463   options.cache_dir = delegate_data->cache_dir.empty()
6464                           ? nullptr
6465                           : delegate_data->cache_dir.c_str();
6466   options.model_token = delegate_data->model_token.empty()
6467                             ? nullptr
6468                             : delegate_data->model_token.c_str();
6469   options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
6470   options.max_number_delegated_partitions =
6471       delegate_data->max_number_delegated_partitions;
6472   options.allow_fp16 = delegate_data->allow_fp16;
6473   options.execution_priority = delegate_data->execution_priority;
6474   options.max_compilation_timeout_duration_ns =
6475       delegate_data->max_compilation_timeout_duration_ns;
6476   options.max_execution_timeout_duration_ns =
6477       delegate_data->max_execution_timeout_duration_ns;
6478   options.max_execution_loop_timeout_duration_ns =
6479       delegate_data->max_execution_loop_timeout_duration_ns;
6480   options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
6481   options.use_burst_computation = delegate_data->use_burst_computation;
6482   options.vendor_compilation_hints = delegate_data->vendor_compilation_hints;
6483   options.vendor_execution_hints = delegate_data->vendor_execution_hints;
6484   options.vendor_plugin = delegate_data->vendor_plugin;
6485   options.max_execution_cache_size = delegate_data->max_execution_cache_size;
6486   options.tensor_max_size_hints = delegate_data->tensor_max_size_hints;
6487   return options;
6488 }
6489 
6490 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
GetTensorMemoryMap(TfLiteDelegate * delegate)6491 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
6492   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6493   return delegate_data->tensor_memory_map;
6494 }
6495 
GetCache(TfLiteDelegate * delegate)6496 delegates::Serialization* StatefulNnApiDelegate::GetCache(
6497     TfLiteDelegate* delegate) {
6498   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6499   return delegate_data->cache.get();
6500 }
6501 
RegisterNnapiMemory(ANeuralNetworksMemory * memory,CopyToHostTensorFnPtr callback,void * callback_context)6502 TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
6503     ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
6504     void* callback_context) {
6505   uint64_t timestamp = delegate_data_.next_buffer_handle_timestamp++;
6506   int map_size = delegate_data_.tensor_memory_map.size();
6507   for (int i = 0; i < map_size; i++) {
6508     if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
6509       delegate_data_.tensor_memory_map[i] = {memory, callback, callback_context,
6510                                              timestamp};
6511       return i;
6512     }
6513   }
6514   delegate_data_.tensor_memory_map.push_back(
6515       {memory, callback, callback_context, timestamp});
6516   return map_size;
6517 }
6518 
DoCopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)6519 TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
6520     TfLiteContext* context, TfLiteDelegate* delegate,
6521     TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
6522   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6523   if (buffer_handle < 0 ||
6524       buffer_handle >= delegate_data->tensor_memory_map.size()) {
6525     return kTfLiteError;
6526   }
6527   auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
6528   auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
6529   auto callback_context =
6530       delegate_data->tensor_memory_map[buffer_handle].callback_context;
6531   if (!memory || !callback) {
6532     return kTfLiteError;
6533   }
6534   return callback(tensor, memory, 0, tensor->bytes, callback_context);
6535 }
6536 
DoCopyToBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)6537 TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
6538     TfLiteContext* context, TfLiteDelegate* delegate,
6539     TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
6540   return kTfLiteError;
6541 }
6542 
DoFreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)6543 void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
6544                                                TfLiteDelegate* delegate,
6545                                                TfLiteBufferHandle* handle) {
6546   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6547   if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
6548     delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
6549     *handle = kTfLiteNullBufferHandle;
6550   }
6551 }
6552 
GetNnApiErrno() const6553 int StatefulNnApiDelegate::GetNnApiErrno() const {
6554   return delegate_data_.nnapi_errno;
6555 }
6556 
6557 // static
GetNodesSupportedByAccelerator(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,const std::vector<int> & supported_nodes,std::vector<int> * device_supported_nodes,int * num_partitions,TfLiteDelegateParams ** params_array,int * nnapi_errno)6558 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
6559     TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
6560     const std::vector<int>& supported_nodes,
6561     std::vector<int>* device_supported_nodes, int* num_partitions,
6562     TfLiteDelegateParams** params_array, int* nnapi_errno) {
6563   auto* delegate_data = static_cast<Data*>(delegate->data_);
6564   // The first entry in the array is the element count
6565 
6566   auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
6567   TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
6568       context, supported_nodes_int_array.get(), params_array, num_partitions));
6569   // For each partition check if which nodes are actually supported by the
6570   // target accelerators.
6571   delegate_data->delegate_state_cache.clear();
6572   for (int idx = 0; idx < *num_partitions; idx++) {
6573     const auto& partition_params = (*params_array)[idx];
6574     std::unique_ptr<NNAPIDelegateKernel> kernel_state(
6575         new NNAPIDelegateKernel(nnapi, delegate_data->vendor_plugin));
6576     TfLiteDelegateParams params_with_delegate = partition_params;
6577     params_with_delegate.delegate = delegate;
6578     TF_LITE_ENSURE_STATUS(
6579         kernel_state->Init(context, &params_with_delegate, nnapi_errno));
6580     std::vector<int> supported_partition_nodes;
6581     TF_LITE_ENSURE_STATUS(
6582         kernel_state->GetOperationsSupportedByTargetNnApiDevices(
6583             context, &supported_partition_nodes, nnapi_errno));
6584     device_supported_nodes->insert(device_supported_nodes->end(),
6585                                    supported_partition_nodes.begin(),
6586                                    supported_partition_nodes.end());
6587 
6588     bool model_fully_supported = (supported_partition_nodes.size() ==
6589                                   partition_params.nodes_to_replace->size);
6590     if (model_fully_supported) {
6591       delegate_data->CacheDelegateKernel(&partition_params,
6592                                          kernel_state.release());
6593     }
6594   }
6595 
6596   if (device_supported_nodes->size() != supported_nodes.size()) {
6597     // We changed the set of nodes to delegate this will create a different
6598     // partitioning layout.
6599     auto device_sup_nodes_int_array =
6600         BuildTfLiteIntArray(*device_supported_nodes);
6601     TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
6602         context, device_sup_nodes_int_array.get(), params_array,
6603         num_partitions));
6604   }
6605 
6606   return kTfLiteOk;
6607 }
6608 
6609 // static
LimitDelegatedPartitions(int max_partitions,std::vector<TfLiteDelegateParams> partition_params_array,std::vector<int> * nodes_to_delegate)6610 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
6611     int max_partitions,
6612     std::vector<TfLiteDelegateParams> partition_params_array,
6613     std::vector<int>* nodes_to_delegate) {
6614   int num_partitions = partition_params_array.size();
6615   if (max_partitions <= 0 || num_partitions <= max_partitions) {
6616     return kTfLiteOk;
6617   }
6618 
6619   int number_delegated_partitions = std::count_if(
6620       partition_params_array.begin(), partition_params_array.end(),
6621       [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
6622         return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
6623                          partition_params.nodes_to_replace->data[0]) !=
6624                nodes_to_delegate->end();
6625       });
6626 
6627   if (number_delegated_partitions > max_partitions) {
6628     std::sort(partition_params_array.begin(), partition_params_array.end(),
6629               [](const TfLiteDelegateParams& left,
6630                  const TfLiteDelegateParams& right) -> bool {
6631                 // Reverse sort
6632                 return left.nodes_to_replace->size >
6633                        right.nodes_to_replace->size;
6634               });
6635 
6636     nodes_to_delegate->clear();
6637 
6638     for (int i = 0; i < max_partitions; i++) {
6639       const TfLiteDelegateParams& partition_params = partition_params_array[i];
6640 
6641       nodes_to_delegate->insert(nodes_to_delegate->end(),
6642                                 partition_params.nodes_to_replace->data,
6643                                 partition_params.nodes_to_replace->data +
6644                                     partition_params.nodes_to_replace->size);
6645     }
6646   }
6647 
6648   return kTfLiteOk;
6649 }
6650 
GetSupportedOpsWithFp16WeightRemapping(TfLiteContext * context,int target_feature_level,bool is_accelerator_specified,int max_number_delegated_partitions)6651 static std::vector<int> GetSupportedOpsWithFp16WeightRemapping(
6652     TfLiteContext* context, int target_feature_level,
6653     bool is_accelerator_specified, int max_number_delegated_partitions) {
6654   std::vector<int> supported_nodes;
6655   delegates::IsNodeSupportedFn node_supported_fn =
6656       [=](TfLiteContext* context, TfLiteNode* node,
6657           TfLiteRegistration* registration,
6658           std::string* unsupported_details) -> bool {
6659     std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
6660     const auto is_supported = NNAPIDelegateKernel::Validate(
6661         context, registration, target_feature_level, node,
6662         is_accelerator_specified, nullptr, &map_failures);
6663     if (!is_supported) {
6664       if (unsupported_details) {
6665         for (auto& failure : map_failures) {
6666           unsupported_details->append(failure.message.c_str());
6667         }
6668       }
6669       return false;
6670     }
6671     return true;
6672   };
6673 
6674   delegates::FP16GraphPartitionHelper partition_helper(context,
6675                                                        node_supported_fn);
6676   std::set<std::string> unsupported_nodes_info;
6677   if (partition_helper.Partition(&unsupported_nodes_info) == kTfLiteOk) {
6678     supported_nodes = partition_helper.GetNodesOfFirstNLargestPartitions();
6679   }
6680   return supported_nodes;
6681 }
6682 
DoPrepare(TfLiteContext * context,TfLiteDelegate * delegate)6683 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
6684                                               TfLiteDelegate* delegate) {
6685   auto* delegate_data = static_cast<Data*>(delegate->data_);
6686   int* nnapi_errno = &(delegate_data->nnapi_errno);
6687   const NnApi* nnapi = delegate_data->nnapi;
6688 
6689   // Resetting the error code when the delegate is initialized
6690   // by TFLite. This causes the error to be reset if reusing the same
6691   // StatefulNnApiDelegate after a failure
6692   *nnapi_errno = 0;
6693 
6694   // Do not check nodes_ if NN API is unavailable.
6695   if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
6696       !nnapi->nnapi_exists) {
6697     return kTfLiteOk;
6698   }
6699 
6700   int target_feature_level = nnapi->android_sdk_version;
6701   const StatefulNnApiDelegate::Options delegate_options =
6702       StatefulNnApiDelegate::GetOptions(delegate);
6703   // For NNAPI 1.2+, check if there is any accelerator available.
6704   // If not, don't delegate to NNAPI's CPU reference implementation unless
6705   // it has been specified as target accelerator.
6706   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
6707     if (ShouldUseTargetDevices(delegate_options, nnapi)) {
6708       std::vector<ANeuralNetworksDevice*> devices;
6709       TF_LITE_ENSURE_STATUS(
6710           GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
6711 
6712       if (devices.empty()) {
6713         if (delegate_options.accelerator_name) {
6714           // There was a selected device and it is not available.
6715           return kTfLiteError;
6716         } else {
6717           // Only nnapi-reference is available but was disabled by the delegate
6718           // options
6719           return kTfLiteOk;
6720         }
6721       }
6722 
6723       TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
6724           context, nnapi, devices, &target_feature_level, nnapi_errno));
6725     } else {
6726       // If no accelerator is specified, only use NNAPI if an accelerator is
6727       // available. Any available accelerator will make the device_count larger
6728       // than 1. More sophisticated check and allowlisting can be added later.
6729       uint32_t device_count = 0;
6730       RETURN_TFLITE_ERROR_IF_NN_ERROR(
6731           context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
6732           "getting number of NNAPI devices", nnapi_errno);
6733       if (device_count <= 1) {
6734         return kTfLiteOk;
6735       }
6736     }
6737   }
6738 
6739   std::vector<int> supported_nodes;
6740   // We don't care about all nodes_, we only care about ones in the
6741   // current plan.
6742   TfLiteIntArray* execution_plan;
6743   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
6744   // Copy the execution plan and wrap it with unique_ptr.
6745   std::unique_ptr<TfLiteIntArray, decltype(&TfLiteIntArrayFree)> plan(
6746       TfLiteIntArrayCopy(execution_plan), TfLiteIntArrayFree);
6747 
6748   // Check for every node if it is supported
6749   const bool is_accelerator_specified = ShouldUseTargetDevices(
6750       delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
6751   std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
6752   // First pass through execution plan to remember mapping of FP16->FP32
6753   // dequantizations in the graph.
6754   std::vector<int> fp16_to_fp32(context->tensors_size, -1);
6755   bool should_prune_fp16_dequantize = false;
6756   for (int i = 0; i < plan->size; ++i) {
6757     const int node_id = plan->data[i];
6758     TfLiteNode* node = nullptr;
6759     TfLiteRegistration* registration = nullptr;
6760     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
6761         context, node_id, &node, &registration));
6762     if (IsDequantizeConstFloat16(context, node, registration)) {
6763       should_prune_fp16_dequantize = true;
6764       fp16_to_fp32[node->inputs->data[0]] = node->outputs->data[0];
6765     }
6766   }
6767   if (should_prune_fp16_dequantize) {
6768     supported_nodes = GetSupportedOpsWithFp16WeightRemapping(
6769         context, target_feature_level, is_accelerator_specified,
6770         delegate_options.max_number_delegated_partitions);
6771   } else {
6772     for (int node_index : TfLiteIntArrayView(plan.get())) {
6773       TfLiteNode* node;
6774       TfLiteRegistration* registration;
6775       TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
6776           context, node_index, &node, &registration));
6777       if (NNAPIDelegateKernel::Validate(
6778               context, registration, target_feature_level, node,
6779               is_accelerator_specified, delegate_options.vendor_plugin,
6780               &map_failures)) {
6781         supported_nodes.push_back(node_index);
6782       }
6783 #ifdef NNAPI_VERBOSE_VALIDATION
6784       for (auto& failure : map_failures) {
6785         TFLITE_LOG_PROD(
6786             TFLITE_LOG_WARNING,
6787             "Operator %s (v%d) refused by NNAPI delegate: %s",
6788             tflite::EnumNameBuiltinOperator(
6789                 static_cast<BuiltinOperator>(registration->builtin_code)),
6790             registration->version, failure.message.c_str());
6791       }
6792       map_failures.clear();
6793 #endif
6794     }
6795   }
6796 
6797   // If there are no delegated nodes, short-circuit node replacement.
6798   if (supported_nodes.empty()) {
6799     return kTfLiteOk;
6800   }
6801 
6802   // NN API Delegate Registration (the pseudo kernel that will invoke NN
6803   // API node sub sets)
6804   static const TfLiteRegistration nnapi_delegate_kernel = {
6805       .init = [](TfLiteContext* context, const char* buffer,
6806                  size_t length) -> void* {
6807         const TfLiteDelegateParams* params =
6808             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
6809 
6810         auto* delegate_data = static_cast<Data*>(params->delegate->data_);
6811         int* nnapi_errno = &(delegate_data->nnapi_errno);
6812 
6813         NNAPIDelegateKernel* kernel_state =
6814             delegate_data->MaybeGetCachedDelegateKernel(params);
6815         if (!kernel_state) {
6816           kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi,
6817                                                  delegate_data->vendor_plugin);
6818           kernel_state->Init(context, params, nnapi_errno);
6819         }
6820 
6821         return kernel_state;
6822       },
6823 
6824       .free = [](TfLiteContext* context, void* buffer) -> void {
6825         delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
6826       },
6827 
6828       .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
6829         NNAPIDelegateKernel* state =
6830             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
6831         int* nnapi_errno =
6832             &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
6833         return state->Prepare(context, node, nnapi_errno);
6834       },
6835 
6836       .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
6837         NNAPIDelegateKernel* state =
6838             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
6839         int* nnapi_errno =
6840             &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
6841         return state->Invoke(context, node, nnapi_errno);
6842       },
6843 
6844       .profiling_string = nullptr,
6845       .builtin_code = kTfLiteBuiltinDelegate,
6846       .custom_name = "TfLiteNnapiDelegate",
6847       .version = 1,
6848   };
6849 
6850   // Initialize caching, if applicable, from Options.
6851   const char* cache_dir = delegate_options.cache_dir;
6852   const char* model_token = delegate_options.model_token;
6853   delegates::SerializationParams params = {model_token, cache_dir};
6854   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
6855       model_token) {
6856     delegate_data->cache = std::make_unique<delegates::Serialization>(params);
6857   }
6858 
6859   delegates::Serialization* cache_ptr = delegate_data->cache.get();
6860 
6861   if (cache_ptr) {
6862     // Reuse cached delegation decision if possible.
6863     std::string accelerator_id = NnApiBackendId(delegate_options);
6864     TfLiteIntArray* cached_nodes_to_delegate = nullptr;
6865     if (delegates::GetDelegatedNodes(context, cache_ptr, accelerator_id,
6866                                      &cached_nodes_to_delegate) == kTfLiteOk) {
6867       if (cached_nodes_to_delegate->size == 0) return kTfLiteOk;
6868       auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
6869           context, nnapi_delegate_kernel, cached_nodes_to_delegate, delegate);
6870       TfLiteIntArrayFree(cached_nodes_to_delegate);
6871       return status;
6872     }
6873   }
6874 
6875   std::vector<int> nodes_to_delegate;
6876 
6877   int num_partitions;
6878   TfLiteDelegateParams* params_array;
6879   if (is_accelerator_specified &&
6880       nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
6881     // Filtering out nodes not supported by target accelerators.
6882     // Cannot query supported operation before NNAPI 1.2
6883     TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
6884         context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
6885         &num_partitions, &params_array, nnapi_errno));
6886   } else {
6887     nodes_to_delegate = supported_nodes;
6888     auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
6889     TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
6890         context, supported_nodes_int_array.get(), &params_array,
6891         &num_partitions));
6892   }
6893 
6894   // FP16GraphPartitionHelper alters the orginal graph by remapping fp32
6895   // dequantize output to fp16 input. In the case of accelerator backends does
6896   // not support all the nodes of the fp16 model, We need to restore original
6897   // graph in order for things to work.
6898   if (should_prune_fp16_dequantize &&
6899       supported_nodes.size() != nodes_to_delegate.size()) {
6900     // Restore original graph
6901     for (int execution_plan_index = 0; execution_plan_index < plan->size;
6902          ++execution_plan_index) {
6903       int node_index = plan->data[execution_plan_index];
6904       TfLiteNode* node = nullptr;
6905       TfLiteRegistration* reg = nullptr;
6906       TF_LITE_ENSURE_STATUS(
6907           context->GetNodeAndRegistration(context, node_index, &node, &reg));
6908       if (reg->builtin_code == kTfLiteBuiltinDequantize) continue;
6909 
6910       for (int i = 0; i < node->inputs->size; ++i) {
6911         const int original_input_idx = node->inputs->data[i];
6912         if (original_input_idx == kTfLiteOptionalTensor) continue;
6913         // Use original FP32 input
6914         if (context->tensors[original_input_idx].type == kTfLiteFloat16 &&
6915             fp16_to_fp32[original_input_idx] != -1) {
6916           node->inputs->data[i] = fp16_to_fp32[original_input_idx];
6917         }
6918       }
6919     }
6920     // Only allow full model delegation for fp16 model.
6921     return kTfLiteOk;
6922   }
6923 
6924   TF_LITE_ENSURE_STATUS(
6925       LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
6926                                std::vector<TfLiteDelegateParams>(
6927                                    params_array, params_array + num_partitions),
6928                                &nodes_to_delegate));
6929 
6930   auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
6931 
6932   if (cache_ptr) {
6933     // Cache list of nodes to be delegated for later.
6934     std::string accelerator_id = NnApiBackendId(delegate_options);
6935     if (delegates::SaveDelegatedNodes(context, cache_ptr, accelerator_id,
6936                                       nodes_to_delegate_int_array.get()) !=
6937         kTfLiteOk) {
6938       // Not a critical error.
6939       TF_LITE_KERNEL_LOG(context, "Could not save delegated nodes");
6940     }
6941   }
6942 
6943   if (nodes_to_delegate_int_array->size == 0) {
6944     return kTfLiteOk;
6945   } else {
6946     // Request TFLite to partition the graph and make kernels
6947     // for each independent node sub set a new nnapi_delegate_kernel.
6948     return context->ReplaceNodeSubsetsWithDelegateKernels(
6949         context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
6950         delegate);
6951   }
6952 }
6953 
6954 // Returns a singleton NNAPI Delegate that can check for support of ops.
NnApiDelegate()6955 TfLiteDelegate* NnApiDelegate() {
6956   static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
6957   return delegate;
6958 }
6959 
6960 }  // namespace tflite
6961