1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
16
17 #include <algorithm>
18 #include <cinttypes>
19 #include <cstdarg>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstring>
24 #include <functional>
25 #include <initializer_list>
26 #include <iostream>
27 #include <iterator>
28 #include <limits>
29 #include <map>
30 #include <memory>
31 #include <string>
32 #include <tuple>
33 #include <utility>
34 #include <vector>
35
36 #include "tensorflow/lite/c/c_api_types.h"
37 #include "tensorflow/lite/delegates/serialization.h"
38 #include "tensorflow/lite/logger.h"
39 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
40 #include "tensorflow/lite/nnapi/sl/public/NeuralNetworksSupportLibraryImpl.h"
41
42 #ifdef __ANDROID__
43 #include <sys/system_properties.h>
44 #endif
45
46 #if defined __ANDROID__ || defined __unix__
47 #define TFLITE_NNAPI_ALLOW_MMAP_SHARING
48 #include <sys/mman.h>
49 #include <unistd.h>
50 #endif
51
52 #include "fp16.h" // from @FP16
53 #include "tensorflow/lite/allocation.h"
54 #include "tensorflow/lite/builtin_op_data.h"
55 #include "tensorflow/lite/builtin_ops.h"
56 #include "tensorflow/lite/c/builtin_op_data.h"
57 #include "tensorflow/lite/c/common.h"
58 #include "tensorflow/lite/context_util.h"
59 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
60 #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
61 #include "tensorflow/lite/delegates/utils.h"
62 #include "tensorflow/lite/kernels/internal/utils/sparsity_format_converter.h"
63 #include "tensorflow/lite/kernels/kernel_util.h"
64 #include "tensorflow/lite/minimal_logging.h"
65 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
66 #include "tensorflow/lite/nnapi/nnapi_util.h"
67 #include "tensorflow/lite/util.h"
68 #ifdef NNAPI_VERBOSE_VALIDATION
69 #include "tensorflow/lite/schema/schema_generated.h"
70 #endif
71 #include "utils/hash/farmhash.h"
72
73 namespace tflite {
74 namespace {
75
76 static const char kNnapiId[] = "nnapi_";
77 constexpr uint64_t kNoMemoryTimestamp = 0;
78
79 // Returns a string ID unique to what accelerator is run by NNAPI, based on
80 // user params. Assumes that the default accelerator is same across runs.
81 // Used for caching nodes to be delegated for a model.
NnApiBackendId(const StatefulNnApiDelegate::Options & delegate_options)82 std::string NnApiBackendId(
83 const StatefulNnApiDelegate::Options& delegate_options) {
84 std::string delegate_id = kNnapiId;
85 if (delegate_options.accelerator_name) {
86 delegate_id += delegate_options.accelerator_name;
87 }
88 return delegate_id;
89 }
90
91 // Returns the enum name corresponding to the given error code if the given
92 // value corresponds to an of the error codes in the enumeration above or
93 // an message with the unknown code.
94 // LINT.IfChange(NnApiErrorDescription)
NnApiErrorDescription(int error_code)95 std::string NnApiErrorDescription(int error_code) {
96 switch (error_code) {
97 case ANEURALNETWORKS_NO_ERROR:
98 return "ANEURALNETWORKS_NO_ERROR";
99 case ANEURALNETWORKS_OUT_OF_MEMORY:
100 return "ANEURALNETWORKS_OUT_OF_MEMORY";
101 case ANEURALNETWORKS_INCOMPLETE:
102 return "ANEURALNETWORKS_INCOMPLETE";
103 case ANEURALNETWORKS_UNEXPECTED_NULL:
104 return "ANEURALNETWORKS_UNEXPECTED_NULL";
105 case ANEURALNETWORKS_BAD_DATA:
106 return "ANEURALNETWORKS_BAD_DATA";
107 case ANEURALNETWORKS_OP_FAILED:
108 return "ANEURALNETWORKS_OP_FAILED";
109 case ANEURALNETWORKS_BAD_STATE:
110 return "ANEURALNETWORKS_BAD_STATE";
111 case ANEURALNETWORKS_UNMAPPABLE:
112 return "ANEURALNETWORKS_UNMAPPABLE";
113 case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
114 return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
115 case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
116 return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
117 case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
118 return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
119 case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
120 return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
121 case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
122 return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
123 case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
124 return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
125 case ANEURALNETWORKS_DEAD_OBJECT:
126 return "ANEURALNETWORKS_DEAD_OBJECT";
127 default:
128 return "Unknown NNAPI error code: " + std::to_string(error_code);
129 }
130 }
131 // LINT.ThenChange()
132
133 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno) \
134 do { \
135 const auto _code = (code); \
136 const auto _call_desc = (call_desc); \
137 if (_code != ANEURALNETWORKS_NO_ERROR) { \
138 const auto error_desc = NnApiErrorDescription(_code); \
139 TF_LITE_KERNEL_LOG(context, \
140 "NN API returned error %s at line %d while %s.\n", \
141 error_desc.c_str(), __LINE__, _call_desc); \
142 *p_errno = _code; \
143 return kTfLiteError; \
144 } \
145 } while (0)
146
147 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
148 p_tensor, p_errno) \
149 do { \
150 const auto _code = (code); \
151 const auto _call_desc = (call_desc); \
152 if (_code != ANEURALNETWORKS_NO_ERROR) { \
153 const auto error_desc = NnApiErrorDescription(_code); \
154 TF_LITE_KERNEL_LOG(context, \
155 "NN API returned error %s at line %d while %s " \
156 "for tensor '%s'.\n", \
157 error_desc.c_str(), __LINE__, _call_desc, \
158 (p_tensor)->name ? (p_tensor)->name : "no-name"); \
159 *p_errno = _code; \
160 return kTfLiteError; \
161 } \
162 } while (0)
163
IsFloat(TfLiteType type)164 bool IsFloat(TfLiteType type) {
165 switch (type) {
166 case kTfLiteFloat32:
167 return true;
168 default:
169 return false;
170 }
171 }
172
IsFloatOrUInt8(TfLiteType type)173 bool IsFloatOrUInt8(TfLiteType type) {
174 switch (type) {
175 case kTfLiteFloat32:
176 case kTfLiteUInt8:
177 return true;
178 default:
179 return false;
180 }
181 }
182
IsQuantized(TfLiteType type)183 bool IsQuantized(TfLiteType type) {
184 switch (type) {
185 case kTfLiteUInt8:
186 case kTfLiteInt8:
187 return true;
188 default:
189 // kTfLiteInt16 isn't supported as quantized type yet.
190 return false;
191 }
192 }
193
IsInt32(TfLiteType type)194 bool IsInt32(TfLiteType type) {
195 switch (type) {
196 case kTfLiteInt32:
197 return true;
198 default:
199 return false;
200 }
201 }
202
IsFloatOrQuantized(TfLiteType type)203 bool IsFloatOrQuantized(TfLiteType type) {
204 switch (type) {
205 case kTfLiteFloat32:
206 case kTfLiteUInt8:
207 case kTfLiteInt8:
208 return true;
209 default:
210 return false;
211 }
212 }
213
IsFloatOrInt32(TfLiteType type)214 bool IsFloatOrInt32(TfLiteType type) {
215 switch (type) {
216 case kTfLiteFloat32:
217 case kTfLiteInt32:
218 return true;
219 default:
220 return false;
221 }
222 }
223
IsFloatQuantizedOrInt32(TfLiteType type)224 bool IsFloatQuantizedOrInt32(TfLiteType type) {
225 switch (type) {
226 case kTfLiteFloat32:
227 case kTfLiteUInt8:
228 case kTfLiteInt8:
229 case kTfLiteInt32:
230 return true;
231 default:
232 return false;
233 }
234 }
235
IsScalarInputSupported(int builtin_code)236 bool IsScalarInputSupported(int builtin_code) {
237 switch (builtin_code) {
238 case kTfLiteBuiltinAdd:
239 case kTfLiteBuiltinMul:
240 case kTfLiteBuiltinSub:
241 case kTfLiteBuiltinDiv:
242 case kTfLiteBuiltinEqual:
243 case kTfLiteBuiltinNotEqual:
244 case kTfLiteBuiltinGreater:
245 case kTfLiteBuiltinGreaterEqual:
246 case kTfLiteBuiltinLess:
247 case kTfLiteBuiltinLessEqual:
248 case kTfLiteBuiltinPow:
249 case kTfLiteBuiltinMaximum:
250 case kTfLiteBuiltinMinimum:
251 case kTfLiteBuiltinPrelu:
252 case kTfLiteBuiltinLeakyRelu:
253 return true;
254 default:
255 return false;
256 }
257 }
258
259 // Check if the operation requires explicit conversion from int8 to uint8
260 // values.
NeedInt8Conversion(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)261 bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
262 const TfLiteNode* node) {
263 const int input_id = node->inputs->data[0];
264 const TfLiteType input_type = context->tensors[input_id].type;
265 switch (builtin_code) {
266 case kTfLiteBuiltinConv2d:
267 case kTfLiteBuiltinDepthwiseConv2d:
268 case kTfLiteBuiltinFullyConnected: {
269 if (input_type == kTfLiteInt8) {
270 const int weights_id = node->inputs->data[1];
271 const auto& weights_tensor = context->tensors[weights_id];
272 if ((weights_tensor.type == kTfLiteInt8 ||
273 weights_tensor.type == kTfLiteUInt8) &&
274 weights_tensor.quantization.type == kTfLiteAffineQuantization) {
275 return true;
276 }
277 }
278 return false;
279 }
280 case kTfLiteBuiltinTransposeConv: {
281 // Transpose convolution has a different order of inputs:
282 // 0: output_shape, 1: filter, 2: input, 3: bias.
283 const int input_id = 2;
284 const TfLiteType input_type = context->tensors[input_id].type;
285 if (input_type == kTfLiteInt8) {
286 return true;
287 }
288 return false;
289 }
290 case kTfLiteBuiltinSelect: {
291 const auto value_type = context->tensors[node->inputs->data[1]].type;
292 return value_type == kTfLiteInt8;
293 }
294 case kTfLiteBuiltinAdd:
295 case kTfLiteBuiltinArgMax:
296 case kTfLiteBuiltinArgMin:
297 case kTfLiteBuiltinAveragePool2d:
298 case kTfLiteBuiltinBatchToSpaceNd:
299 case kTfLiteBuiltinConcatenation:
300 case kTfLiteBuiltinEqual:
301 case kTfLiteBuiltinExpandDims:
302 case kTfLiteBuiltinGather:
303 case kTfLiteBuiltinGreater:
304 case kTfLiteBuiltinGreaterEqual:
305 case kTfLiteBuiltinHardSwish:
306 case kTfLiteBuiltinL2Normalization:
307 case kTfLiteBuiltinLeakyRelu:
308 case kTfLiteBuiltinLess:
309 case kTfLiteBuiltinLessEqual:
310 case kTfLiteBuiltinLogistic:
311 case kTfLiteBuiltinMaximum:
312 case kTfLiteBuiltinMaxPool2d:
313 case kTfLiteBuiltinMean:
314 case kTfLiteBuiltinMinimum:
315 case kTfLiteBuiltinMul:
316 case kTfLiteBuiltinNotEqual:
317 case kTfLiteBuiltinPad:
318 case kTfLiteBuiltinPadv2:
319 case kTfLiteBuiltinPrelu:
320 case kTfLiteBuiltinReduceMax:
321 case kTfLiteBuiltinReduceMin:
322 case kTfLiteBuiltinRelu:
323 case kTfLiteBuiltinReluN1To1:
324 case kTfLiteBuiltinRelu6:
325 case kTfLiteBuiltinResizeBilinear:
326 case kTfLiteBuiltinResizeNearestNeighbor:
327 case kTfLiteBuiltinReshape:
328 case kTfLiteBuiltinSlice:
329 case kTfLiteBuiltinSoftmax:
330 case kTfLiteBuiltinSpaceToBatchNd:
331 case kTfLiteBuiltinSpaceToDepth:
332 case kTfLiteBuiltinDepthToSpace:
333 case kTfLiteBuiltinStridedSlice:
334 case kTfLiteBuiltinSub:
335 case kTfLiteBuiltinTanh:
336 case kTfLiteBuiltinTile:
337 case kTfLiteBuiltinTopkV2:
338 case kTfLiteBuiltinTranspose: {
339 return input_type == kTfLiteInt8;
340 }
341 default:
342 return false;
343 }
344 }
345
346 constexpr int kLstmFullKernelInputSize = 24;
347 // The 20 input version is deprecated and kept only to
348 // support old model. The latest version of the LSTM Full Kernel
349 // is the one with 24 inputs
350 constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
351 constexpr int kLstmBasicKernelInputSize = 5;
352
isLstmBasicKernel(const TfLiteNode * node)353 inline bool isLstmBasicKernel(const TfLiteNode* node) {
354 return node->inputs->size == kLstmBasicKernelInputSize;
355 }
356
isLstmFullKernel(const TfLiteNode * node)357 inline bool isLstmFullKernel(const TfLiteNode* node) {
358 return node->inputs->size == kLstmFullKernelInputSize ||
359 node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
360 }
361
IsMeanWithDifferentInputOutputQuantization(const TfLiteContext * context,const TfLiteNode * node)362 bool IsMeanWithDifferentInputOutputQuantization(const TfLiteContext* context,
363 const TfLiteNode* node) {
364 const auto& input = context->tensors[node->inputs->data[0]];
365 const auto& output = context->tensors[node->outputs->data[0]];
366 return input.params.scale != output.params.scale ||
367 input.params.zero_point != output.params.zero_point;
368 }
369
IsBroadcastBatchMatMul(const TfLiteContext * context,const TfLiteNode * node)370 bool IsBroadcastBatchMatMul(const TfLiteContext* context,
371 const TfLiteNode* node) {
372 const auto& input0 = context->tensors[node->inputs->data[0]];
373 const auto& input1 = context->tensors[node->inputs->data[1]];
374 if (input0.dims->size != input1.dims->size) {
375 return true;
376 }
377 for (int i = 0; i < input0.dims->size - 2; i++) {
378 if (input0.dims->data[i] != input1.dims->data[i]) {
379 return true;
380 }
381 }
382 return false;
383 }
384
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)385 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
386 const TfLiteNode* node) {
387 switch (builtin_code) {
388 case kTfLiteBuiltinConv2d:
389 case kTfLiteBuiltinFullyConnected: {
390 const int input_id = node->inputs->data[0];
391 const int filter_id = node->inputs->data[1];
392 const TfLiteType input_type = context->tensors[input_id].type;
393 const TfLiteType filter_type = context->tensors[filter_id].type;
394 return IsFloat(input_type) && IsQuantized(filter_type);
395 }
396 case kTfLiteBuiltinLstm: {
397 const int input_id = node->inputs->data[0];
398 // Input #1 is optional so use #2 to determine if hybrid.
399 const int weights_id = node->inputs->data[2];
400 const TfLiteType input_type = context->tensors[input_id].type;
401 const TfLiteType weights_type = context->tensors[weights_id].type;
402 return isLstmFullKernel(node) && IsFloat(input_type) &&
403 IsQuantized(weights_type);
404 }
405 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
406 const int input_id = node->inputs->data[0];
407 // Input #1 is optional so use #2 to determine if hybrid.
408 const int weights_id = node->inputs->data[2];
409 const TfLiteType input_type = context->tensors[input_id].type;
410 const TfLiteType weights_type = context->tensors[weights_id].type;
411 return IsFloat(input_type) && IsQuantized(weights_type);
412 }
413 case kTfLiteBuiltinBidirectionalSequenceLstm: {
414 const int input_id = node->inputs->data[0];
415 // Input #1 is optional so use #2 to determine if hybrid.
416 const int weights_id = node->inputs->data[2];
417 const TfLiteType input_type = context->tensors[input_id].type;
418 const TfLiteType weights_type = context->tensors[weights_id].type;
419 return IsFloat(input_type) && IsQuantized(weights_type);
420 }
421 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
422 const int input_id = node->inputs->data[0];
423 const int weights_id = node->inputs->data[1];
424 const TfLiteType input_type = context->tensors[input_id].type;
425 const TfLiteType weights_type = context->tensors[weights_id].type;
426 return IsFloat(input_type) && IsQuantized(weights_type);
427 }
428 default:
429 return false;
430 }
431 }
432
IsDequantizeConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)433 bool IsDequantizeConstFloat16(TfLiteContext* context, const TfLiteNode* node,
434 const TfLiteRegistration* registration) {
435 return registration->builtin_code == kTfLiteBuiltinDequantize &&
436 context->tensors[node->inputs->data[0]].type ==
437 TfLiteType::kTfLiteFloat16 &&
438 IsConstantTensor(&context->tensors[node->inputs->data[0]]);
439 }
440
IsDequantizeNonConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)441 bool IsDequantizeNonConstFloat16(TfLiteContext* context, const TfLiteNode* node,
442 const TfLiteRegistration* registration) {
443 return registration->builtin_code == kTfLiteBuiltinDequantize &&
444 context->tensors[node->inputs->data[0]].type ==
445 TfLiteType::kTfLiteFloat16 &&
446 !IsConstantTensor(&context->tensors[node->inputs->data[0]]);
447 }
448
IsDensifyConstTensor(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)449 bool IsDensifyConstTensor(TfLiteContext* context, const TfLiteNode* node,
450 const TfLiteRegistration* registration) {
451 return registration->builtin_code == kTfLiteBuiltinDensify &&
452 IsConstantTensor(&context->tensors[node->inputs->data[0]]);
453 }
454
ConvertTensorTypeToNNType(const TfLiteTensor * tensor,TfLiteType ann_type_equivalent,bool use_int8_asymm_signed)455 ANeuralNetworksOperandType ConvertTensorTypeToNNType(
456 const TfLiteTensor* tensor, TfLiteType ann_type_equivalent,
457 bool use_int8_asymm_signed) {
458 int32_t nn_type = 0;
459 float scale = 0.0f;
460 int32_t zero_point = 0;
461 switch (tensor->type) {
462 case kTfLiteFloat32:
463 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
464 break;
465 case kTfLiteUInt8:
466 nn_type = ann_type_equivalent == kTfLiteInt32
467 ? ANEURALNETWORKS_TENSOR_INT32
468 : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
469 scale = tensor->params.scale;
470 zero_point = tensor->params.zero_point;
471 if (scale == 0) {
472 // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
473 // with zero scale are not valid in NNAPI.
474 scale = 1;
475 }
476 break;
477 case kTfLiteInt8:
478 scale = tensor->params.scale;
479 zero_point = tensor->params.zero_point;
480 if (use_int8_asymm_signed) {
481 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
482 } else if (ann_type_equivalent == kTfLiteUInt8) {
483 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
484 zero_point += 128;
485 } else if (ann_type_equivalent == kTfLiteInt32) {
486 nn_type = ANEURALNETWORKS_TENSOR_INT32;
487 zero_point += 128;
488 } else {
489 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
490 }
491 if (scale == 0) {
492 // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
493 // with zero scale are not valid in NNAPI.
494 scale = 1;
495 }
496 break;
497 case kTfLiteInt32:
498 nn_type = ANEURALNETWORKS_TENSOR_INT32;
499 scale = tensor->params.scale;
500 zero_point = tensor->params.zero_point;
501 break;
502 case kTfLiteBool:
503 nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
504 break;
505 case kTfLiteInt16:
506 nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
507 scale = tensor->params.scale;
508 zero_point = tensor->params.zero_point;
509 break;
510 default:
511 break;
512 }
513 uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
514 uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
515 static uint32_t scalar_rank = 1;
516 // treat scalar input as single cell tensor in NNAPI.
517 if (tensor_rank == 0) {
518 tensor_rank = scalar_rank;
519 tensor_dims = &scalar_rank;
520 }
521 ANeuralNetworksOperandType nn_operand_type{
522 .type = nn_type,
523 .dimensionCount = tensor_rank,
524 .dimensions = tensor_dims,
525 .scale = scale,
526 .zeroPoint = zero_point,
527 };
528 return nn_operand_type;
529 }
530
531 // NNAPI in API 31 hard-code the preferred alignment/padding with 64 bytes.
532 constexpr size_t kDefaultByteAlignmentForNNAPI = 64;
533
GetNumPaddingBytes(size_t byte_size)534 static size_t GetNumPaddingBytes(size_t byte_size) {
535 size_t num_padding_bytes = 0;
536 if (byte_size % kDefaultByteAlignmentForNNAPI) {
537 num_padding_bytes = kDefaultByteAlignmentForNNAPI -
538 (byte_size % kDefaultByteAlignmentForNNAPI);
539 }
540 return num_padding_bytes;
541 }
542
GetNNTensorSize(size_t tensor_size,bool allow_padding)543 static size_t GetNNTensorSize(size_t tensor_size, bool allow_padding) {
544 size_t padding_bytes = GetNumPaddingBytes(tensor_size);
545 size_t nn_tensor_size = tensor_size;
546 if (allow_padding) {
547 nn_tensor_size += padding_bytes;
548 }
549 return nn_tensor_size;
550 }
551
552 // Return NNAPI device handle with the provided null-terminated device name.
553 // Returns kTfLiteError in case of any NNAPI error and if no device with the
554 // given name can be found.
GetDeviceHandle(const NnApi * nnapi,TfLiteContext * context,const char * device_name_ptr,ANeuralNetworksDevice ** result,int * nnapi_errno)555 TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
556 const char* device_name_ptr,
557 ANeuralNetworksDevice** result, int* nnapi_errno) {
558 if (!device_name_ptr) return kTfLiteError;
559 *result = nullptr;
560 std::string device_name(device_name_ptr);
561 uint32_t num_devices = 0;
562 nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
563
564 for (uint32_t i = 0; i < num_devices; i++) {
565 ANeuralNetworksDevice* device = nullptr;
566 const char* buffer = nullptr;
567 RETURN_TFLITE_ERROR_IF_NN_ERROR(
568 context, nnapi->ANeuralNetworks_getDevice(i, &device),
569 "Searching for target device", nnapi_errno);
570
571 RETURN_TFLITE_ERROR_IF_NN_ERROR(
572 context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
573 "Searching for target device", nnapi_errno);
574
575 if (device_name == buffer) {
576 *result = device;
577 return kTfLiteOk;
578 }
579 }
580
581 TF_LITE_KERNEL_LOG(context,
582 "Could not find the specified NNAPI accelerator: %s. "
583 "Must be one of: {%s}.",
584 device_name_ptr,
585 nnapi::GetStringDeviceNamesList(nnapi).c_str());
586 return kTfLiteError;
587 }
588
589 // Compute the hash of a TfLiteIntArray.
GetHash(const TfLiteIntArray * int_array,uint64_t combine_with=0)590 uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
591 constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
592 uint64_t result = combine_with;
593 for (auto i : TfLiteIntArrayView(int_array)) {
594 result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
595 }
596 return result;
597 }
598
HasZeroes(TfLiteIntArrayView array)599 bool HasZeroes(TfLiteIntArrayView array) {
600 for (auto value : array) {
601 if (value == 0) {
602 return true;
603 }
604 }
605 return false;
606 }
607
608 // In SPLIT_V, it is legal to specify -1 in size_splits representing an unknown
609 // split size taking as many values as possible. This function computes and
610 // returns the actual value of this unknown size, or returns -1 if all split
611 // sizes are known. The caller is responsible for making sure the size_splits
612 // and axis tensor are constants.
ComputeSplitVUnknownSplitSize(const TfLiteContext * context,const TfLiteNode * node)613 int ComputeSplitVUnknownSplitSize(const TfLiteContext* context,
614 const TfLiteNode* node) {
615 const auto& input = context->tensors[node->inputs->data[0]];
616 const auto& size_splits_tensor = context->tensors[node->inputs->data[1]];
617 const auto& axis_tensor = context->tensors[node->inputs->data[2]];
618
619 const auto* size_splits = size_splits_tensor.data.i32;
620 int num_splits = size_splits_tensor.dims->data[0];
621 bool has_unknown_split_size = false;
622 int sum_of_known_split_sizes = 0;
623 for (int i = 0; i < num_splits; i++) {
624 if (size_splits[i] == -1) {
625 has_unknown_split_size = true;
626 } else {
627 sum_of_known_split_sizes += size_splits[i];
628 }
629 }
630
631 int axis = axis_tensor.data.i32[0];
632 axis = axis < 0 ? axis + input.dims->size : axis;
633 int total_size = input.dims->data[axis];
634 return has_unknown_split_size ? total_size - sum_of_known_split_sizes : -1;
635 }
636
637 // Bit mask for tensor flags.
638 enum {
639 NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
640 NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
641 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
642 NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
643 NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION = 1U << 4,
644 };
645
646 // Returns the feature level to target when delegating to the given devices.
647 // The feature level is the max of the ones supported by the devices or
648 // the current NNAPI runtime feature level if no device is present.
GetTargetFeatureLevel(TfLiteContext * context,const NnApi * nnapi,const std::vector<ANeuralNetworksDevice * > & device_handles,int * target_feature_level,int * nnapi_errno)649 TfLiteStatus GetTargetFeatureLevel(
650 TfLiteContext* context, const NnApi* nnapi,
651 const std::vector<ANeuralNetworksDevice*>& device_handles,
652 int* target_feature_level, int* nnapi_errno) {
653 *target_feature_level = nnapi->nnapi_runtime_feature_level;
654 int64_t devices_feature_level = -1;
655 for (const auto* device_handle : device_handles) {
656 int64_t curr_device_feature_level;
657 RETURN_TFLITE_ERROR_IF_NN_ERROR(
658 context,
659 nnapi->ANeuralNetworksDevice_getFeatureLevel(
660 device_handle, &curr_device_feature_level),
661 "Searching for target device", nnapi_errno);
662
663 devices_feature_level =
664 std::max(curr_device_feature_level, devices_feature_level);
665 }
666
667 if ((devices_feature_level > 0) &&
668 // This second check is necessary since if the nnapi-reference device is
669 // in the list of target devices the devices_feature_level value will be
670 // 1000.
671 (devices_feature_level < nnapi->nnapi_runtime_feature_level)) {
672 TFLITE_LOG(TFLITE_LOG_INFO,
673 "Changing NNAPI Feature Level %lld to "
674 "supported by target devices: %lld",
675 nnapi->android_sdk_version, devices_feature_level);
676
677 *target_feature_level = devices_feature_level;
678 }
679
680 return kTfLiteOk;
681 }
682
683 // Returns true if this delegate is configured to use a specific set of devices.
684 // This will happen either if:
685 // - accelerator_name option has been specified
686 // - NNAPI CPU implementation has been explicitly disabled.
687 // If exclude_nnapi_reference is true this method will return false if the
688 // accelerator_name in the delegate options is equal to "nnapi-reference"
ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,const NnApi * nnapi,bool exclude_nnapi_reference=false)689 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
690 const NnApi* nnapi,
691 bool exclude_nnapi_reference = false) {
692 const char* device_name_ptr = delegate_options.accelerator_name;
693 std::string nnapi_cpu("nnapi-reference");
694 bool has_selected_accelerator = device_name_ptr != nullptr;
695 if (exclude_nnapi_reference && has_selected_accelerator) {
696 if (nnapi_cpu == device_name_ptr) return false;
697 }
698 return (delegate_options.disallow_nnapi_cpu &&
699 nnapi->android_sdk_version >=
700 delegate::nnapi::kMinSdkVersionForNNAPI12) ||
701 has_selected_accelerator;
702 }
703
704 // Fills the given result vector with the list of devices the given delegate
705 // is referring to.
706 // There are three possible results:
707 // - an empty array (not the full list of available accelerators,
708 // for efficiency reasons) if no accelerator is chosen and the
709 // disallow_nnapi_cpu delegate option is false.
710 // - A single element array with the target processor, if an accelerator name
711 // is specified in the delegate options.
712 // - The full list of devices available on device less the nnapi reference
713 // implementation if the delegate option disallow_nnapi_cpu has been
714 // specified.
GetTargetDevices(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,int * nnapi_errno,std::vector<ANeuralNetworksDevice * > * result)715 TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
716 const NnApi* nnapi, int* nnapi_errno,
717 std::vector<ANeuralNetworksDevice*>* result) {
718 if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
719 return kTfLiteError;
720 }
721
722 const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
723 const char* device_name_ptr = delegate_options.accelerator_name;
724
725 if (device_name_ptr != nullptr) {
726 // User specified an accelerator to use.
727 ANeuralNetworksDevice* nnapi_device = nullptr;
728 TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
729 &nnapi_device, nnapi_errno));
730 result->push_back(nnapi_device);
731 } else if (delegate_options.disallow_nnapi_cpu) {
732 std::string nnapi_cpu("nnapi-reference");
733 uint32_t num_devices = 0;
734 nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
735
736 for (uint32_t i = 0; i < num_devices; i++) {
737 ANeuralNetworksDevice* device = nullptr;
738 const char* buffer = nullptr;
739 RETURN_TFLITE_ERROR_IF_NN_ERROR(
740 context, nnapi->ANeuralNetworks_getDevice(i, &device),
741 "Getting list of available devices", nnapi_errno);
742 RETURN_TFLITE_ERROR_IF_NN_ERROR(
743 context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
744 "Getting list of available devices", nnapi_errno);
745 if (nnapi_cpu != buffer) {
746 result->push_back(device);
747 }
748 }
749 }
750
751 return kTfLiteOk;
752 }
753
754 // The context to be used with NnapiMappingUtilCInterface.
755 class NnapiMappingContext {
756 public:
757 // Next index of ann tensor
758 int next_ann_tensor_index_ = 0;
759 // Mapping from lite tensor index.
760 std::vector<int> lite_tensor_to_ann_tensor_;
761 // Mapping from lite index to a type which tensor must be converted to during
762 // the copying of the data to the memory allocated for NN API. kTfLiteNoType
763 // means no conversion is needed.
764 std::vector<int> index_to_type_conversion_;
765 // Mapping from lite node index.
766 std::vector<int> nnapi_to_tflite_op_mapping_;
767 };
768
769 } // namespace
770
771 namespace delegate {
772 namespace nnapi {
773
774 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
NNMemory(const NnApi * nnapi,const char * name,size_t size)775 NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
776 if (name && size > 0) {
777 nnapi_ = nnapi;
778 byte_size_ = size;
779 #ifdef __ANDROID__
780 fd_ = nnapi_->ASharedMemory_create(name, size);
781 #else
782 // For non-Android platforms ASharedMemory_create needs unique name to
783 // create a shared memory object (see nnapi_implementation.cc).
784 char shm_name_buffer[L_tmpnam];
785 if (tmpnam(shm_name_buffer) == nullptr) {
786 shm_name_buffer[0] = '\0';
787 }
788 // tmpnam will produce a string containing with slashes, but shm_open
789 // won't like that.
790 shm_region_name_ = std::string(name) + std::string(shm_name_buffer);
791 std::replace(shm_region_name_.begin(), shm_region_name_.end(), '/', '-');
792 fd_ = nnapi_->ASharedMemory_create(shm_region_name_.c_str(), size);
793 #endif
794
795 data_ptr_ = reinterpret_cast<uint8_t*>(
796 mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
797 nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
798 fd_, 0, &nn_memory_handle_);
799 }
800 }
801 #else
802 NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
803 size_t /*size*/)
804 : nnapi_(nullptr) {}
805 #endif
806
~NNMemory()807 NNMemory::~NNMemory() {
808 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
809 if (data_ptr_) {
810 munmap(data_ptr_, byte_size_);
811 }
812 if (nn_memory_handle_) {
813 nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
814 }
815 #ifdef __ANDROID__
816 if (fd_ >= 0) close(fd_);
817 #else
818 if (!shm_region_name_.empty()) shm_unlink(shm_region_name_.c_str());
819 #endif
820 #endif
821 }
822
823 class DequantizeMapping {
824 public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const825 int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
826 for (const auto& element : mapping_) {
827 if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
828 return std::get<2>(element);
829 }
830 }
831 return -1;
832 }
833
Add(int ann_index,TfLiteType type,int dequantized_ann_index)834 void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
835 // This assumes it is not already mapped.
836 mapping_.emplace_back(ann_index, type, dequantized_ann_index);
837 }
838
839 private:
840 // Each tuple specifies the ANN (quantized) tensor index, the desired
841 // floating-point type and the matching ANN (dequantized) tensor index. This
842 // could use a map but instead std::vector is used to keep code size lower.
843 std::vector<std::tuple<int, TfLiteType, int>> mapping_;
844 };
845
846 // Abstract builder for building an op in the NN API graph. This handles
847 // the disparity between TFLite and NN API operand types. NN API has singular
848 // operands for both tensors and parameters, and TFLite separates the two.
849 class NNAPIOpBuilder {
850 public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,NnapiMappingUtilCInterface * mapping_util,DequantizeMapping * dequantize_mapping,std::map<const MMAPAllocation *,ANeuralNetworksMemory * > * allocation_mapping,ANeuralNetworksModel * nn_model,int * nnapi_errno,bool allow_dynamic_dimensions)851 NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
852 NnapiMappingUtilCInterface* mapping_util,
853 DequantizeMapping* dequantize_mapping,
854 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
855 allocation_mapping,
856 ANeuralNetworksModel* nn_model, int* nnapi_errno,
857 bool allow_dynamic_dimensions)
858 : nnapi_(nnapi),
859 context_(context),
860 mapping_util_(mapping_util),
861 dequantize_mapping_(dequantize_mapping),
862 allocation_memory_mapping_(allocation_mapping),
863 nn_model_(nn_model),
864 nnapi_errno_(nnapi_errno),
865 allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
866
AddScalarBoolOperand(bool value)867 TfLiteStatus AddScalarBoolOperand(bool value) {
868 return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
869 }
870
AddScalarInt32Operand(int32_t value)871 TfLiteStatus AddScalarInt32Operand(int32_t value) {
872 return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
873 }
874
AddScalarFloat32Operand(float value)875 TfLiteStatus AddScalarFloat32Operand(float value) {
876 return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
877 }
878
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)879 TfLiteStatus AddVectorInt32Operand(const int32_t* values,
880 uint32_t num_values) {
881 return AddVectorOperand<int32_t>(values, num_values,
882 ANEURALNETWORKS_TENSOR_INT32,
883 /*scale=*/0.f, /*zero_point=*/0);
884 }
885
AddVectorInt32Operand(const int32_t * values,uint32_t num_values,float scale,int32_t zero_point)886 TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
887 float scale, int32_t zero_point) {
888 return AddVectorOperand<int32_t>(
889 values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
890 }
891
AddVectorInt16Operand(const int16_t * values,uint32_t num_values)892 TfLiteStatus AddVectorInt16Operand(const int16_t* values,
893 uint32_t num_values) {
894 return AddVectorOperand<int16_t>(values, num_values,
895 ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
896 /*scale=*/1.f, /*zero_point=*/0);
897 }
898
AddVectorInt8Operand(const int8_t * values,uint32_t num_values)899 TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
900 return AddVectorOperand<int8_t>(values, num_values,
901 ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
902 /*scale=*/1.f, /*zero_point=*/0);
903 }
904
AddVectorFloat32Operand(const float * values,uint32_t num_values)905 TfLiteStatus AddVectorFloat32Operand(const float* values,
906 uint32_t num_values) {
907 return AddVectorOperand<float>(values, num_values,
908 ANEURALNETWORKS_TENSOR_FLOAT32);
909 }
910
AddPoolingParams(void * data)911 TfLiteStatus AddPoolingParams(void* data) {
912 auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
913 AddScalarInt32Operand(builtin->padding);
914 AddScalarInt32Operand(builtin->stride_width);
915 AddScalarInt32Operand(builtin->stride_height);
916 AddScalarInt32Operand(builtin->filter_width);
917 AddScalarInt32Operand(builtin->filter_height);
918 AddScalarInt32Operand(builtin->activation);
919 return kTfLiteOk;
920 }
921
AddTensorInput(int tensor_index,bool hybrid_op,int tensor_flags=0)922 TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
923 int tensor_flags = 0) {
924 return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
925 }
926
AddTensorOutput(int tensor_index,int tensor_flags=0)927 TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
928 return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
929 tensor_flags);
930 }
931
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)932 TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
933 std::vector<uint32_t> dims(dimension_count, 0);
934 return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
935 }
936
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)937 TfLiteStatus AddStateFloat32Tensor(int tensor_index,
938 int* ann_tensor_index_out) {
939 TfLiteTensor* tensor = &context_->tensors[tensor_index];
940 return AddFloat32OutputTensor(
941 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
942 ann_tensor_index_out);
943 }
944
AddStateInt16Tensor(int tensor_index,int * ann_tensor_index_out)945 TfLiteStatus AddStateInt16Tensor(int tensor_index,
946 int* ann_tensor_index_out) {
947 TfLiteTensor* tensor = &context_->tensors[tensor_index];
948 return AddAdditionalOutputTensor(
949 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
950 ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
951 tensor->params.zero_point, ann_tensor_index_out);
952 }
953
AddStateInt8AsymTensor(int tensor_index,int * ann_tensor_index_out)954 TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
955 int* ann_tensor_index_out) {
956 TfLiteTensor* tensor = &context_->tensors[tensor_index];
957 return AddAdditionalOutputTensor(
958 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
959 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
960 tensor->params.zero_point, ann_tensor_index_out);
961 }
962
963 // Add a constant tensor with a single element, intended for broadcast capable
964 // ops.
AddSingleValueConstantTensor(float value,bool is_quantized)965 TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
966 if (!is_quantized) {
967 return AddVectorFloat32Operand(&value, 1);
968 } else {
969 // in the case that we need to add a quantized tensor, set the value to
970 // 64, zero_point to be 0 and adjust scale accordingly.
971 const uint8_t quant8_value = 64;
972 return AddVectorOperand<uint8_t>(&quant8_value, 1,
973 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
974 value / quant8_value, 0);
975 }
976 }
977
978 // Calculate the scale and zero_point for 8-bit unsigned tensor, given float
979 // min and max. zero_point is clamped to [0, 255].
CalculateQuantizationParams(float min,float max,float * scale,int * zero_point)980 TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
981 int* zero_point) {
982 if (max < min) return kTfLiteError;
983 *scale = (max - min) / 255.f;
984 if (min > 0.f) {
985 *zero_point = 0;
986 } else if (max < 0.f) {
987 *zero_point = 255;
988 } else {
989 *zero_point = (0.f - min) / (*scale);
990 }
991 return kTfLiteOk;
992 }
993
994 // Lower hardswish according to the following equation:
995 // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
996 // = 0.5x * Relu_N1_to_1(x/3) + 0.5x
TransformHardSwishIntoSupportedOps(int lite_input_index,int lite_output_index,bool need_int8_conversion,int lite_node_index)997 TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
998 int lite_output_index,
999 bool need_int8_conversion,
1000 int lite_node_index) {
1001 const TfLiteTensor& tensor = context_->tensors[lite_input_index];
1002 float input_scale = tensor.params.scale;
1003 int input_zero_point = tensor.params.zero_point;
1004 float input_min = 0.f;
1005 float input_max = 0.f;
1006 int tensor_flags = 0;
1007 if (need_int8_conversion) {
1008 tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
1009 input_zero_point += 128;
1010 }
1011 bool is_quantized = false;
1012 int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1013 if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
1014 is_quantized = true;
1015 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1016 input_min = (0 - input_zero_point) * input_scale;
1017 input_max = (255 - input_zero_point) * input_scale;
1018 }
1019
1020 // Stage1 : s1 = Relu1(x * 1/3)
1021 float s1_output_min = 0.f;
1022 float s1_output_max = 0.f;
1023 int s1_out_ann_index = 0;
1024 {
1025 float s1_output_scale = 0.f;
1026 int s1_output_zero_point = 0;
1027 if (is_quantized) {
1028 // clamp the output range to [-1, 1] if needed.
1029 s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
1030 s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
1031 CalculateQuantizationParams(s1_output_min, s1_output_max,
1032 &s1_output_scale, &s1_output_zero_point);
1033 }
1034 TF_LITE_ENSURE_OK(context_,
1035 AddTensorInput(lite_input_index, false, tensor_flags));
1036 const float value3f = 1.f / 3.f;
1037 TF_LITE_ENSURE_OK(context_,
1038 AddSingleValueConstantTensor(value3f, is_quantized));
1039 TF_LITE_ENSURE_OK(context_,
1040 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
1041 TF_LITE_ENSURE_OK(
1042 context_,
1043 AddAdditionalOutputTensor(
1044 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1045 nn_type, s1_output_scale, s1_output_zero_point,
1046 &s1_out_ann_index));
1047 TF_LITE_ENSURE_OK(
1048 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1049 }
1050
1051 // Stage2 : s2 = x / 2
1052 float s2_output_min = input_min / 2.f;
1053 float s2_output_max = input_max / 2.f;
1054 int s2_out_ann_index = 0;
1055 {
1056 float s2_output_scale = input_scale / 2.0f;
1057 int s2_output_zero_point = input_zero_point;
1058 TF_LITE_ENSURE_OK(context_,
1059 AddTensorInput(lite_input_index, false, tensor_flags));
1060 const float value2f = 0.5f;
1061 TF_LITE_ENSURE_OK(context_,
1062 AddSingleValueConstantTensor(value2f, is_quantized));
1063 TF_LITE_ENSURE_OK(context_,
1064 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1065 TF_LITE_ENSURE_OK(
1066 context_,
1067 AddAdditionalOutputTensor(
1068 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1069 nn_type, s2_output_scale, s2_output_zero_point,
1070 &s2_out_ann_index));
1071 TF_LITE_ENSURE_OK(
1072 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1073 }
1074
1075 // Stage 3 : s3 = s1 * s2
1076 int s3_out_ann_index = 0;
1077 {
1078 augmented_inputs_.push_back(s1_out_ann_index);
1079 augmented_inputs_.push_back(s2_out_ann_index);
1080 TF_LITE_ENSURE_OK(context_,
1081 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1082 float s3_output_scale = 0.f;
1083 int s3_output_zero_point = 0;
1084 if (is_quantized) {
1085 // the min for stage 3 is always 0.0f.
1086 float s3_output_min = 0.f;
1087 // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
1088 float s3_output_max =
1089 s1_output_max * s2_output_max > s1_output_min * s2_output_min
1090 ? s1_output_max * s2_output_max
1091 : s1_output_min * s2_output_min;
1092 CalculateQuantizationParams(s3_output_min, s3_output_max,
1093 &s3_output_scale, &s3_output_zero_point);
1094 }
1095 TF_LITE_ENSURE_OK(
1096 context_,
1097 AddAdditionalOutputTensor(
1098 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1099 nn_type, s3_output_scale, s3_output_zero_point,
1100 &s3_out_ann_index));
1101 TF_LITE_ENSURE_OK(
1102 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1103 }
1104
1105 // Stage 4: y = s3 + s2
1106 {
1107 augmented_inputs_.push_back(s2_out_ann_index);
1108 augmented_inputs_.push_back(s3_out_ann_index);
1109 TF_LITE_ENSURE_OK(context_,
1110 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1111 TF_LITE_ENSURE_OK(context_,
1112 AddTensorOutput(lite_output_index, tensor_flags));
1113 TF_LITE_ENSURE_OK(
1114 context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
1115 }
1116
1117 return kTfLiteOk;
1118 }
1119
1120 // Adds the operation to the model and maps the operation to the originating
1121 // TFLite one.
AddOperationToModel(ANeuralNetworksOperationType type,uint32_t input_count,const uint32_t * inputs,uint32_t output_count,const uint32_t * outputs,int lite_node_index)1122 TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
1123 uint32_t input_count, const uint32_t* inputs,
1124 uint32_t output_count,
1125 const uint32_t* outputs,
1126 int lite_node_index) {
1127 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1128 context_,
1129 nnapi_->ANeuralNetworksModel_addOperation(
1130 nn_model_, type, input_count, inputs, output_count, outputs),
1131 "adding operation", nnapi_errno_);
1132 mapping_util_->AddNnapiToTfliteOpMapping(mapping_util_, lite_node_index);
1133 return kTfLiteOk;
1134 }
1135
1136 // Adds a Dequantize operator and replaces the input tensor index with the
1137 // dequantized version. If the dequantized version of the operator already
1138 // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_tensor_index,TfLiteType dequantized_type,int lite_node_index)1139 TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
1140 TfLiteType dequantized_type, int lite_node_index) {
1141 const int ann_index =
1142 mapping_util_->TfLiteIndexToNnIndex(mapping_util_, lite_tensor_index);
1143 int dequantized_ann_index =
1144 dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
1145
1146 if (dequantized_ann_index == -1) {
1147 // The dequantized version does not exist yet, it has to be added: a new
1148 // Dequantize operation is added, yielding a new tensor.
1149 const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
1150 ANeuralNetworksOperandType operand_type{
1151 ANEURALNETWORKS_TENSOR_FLOAT32,
1152 static_cast<uint32_t>(tensor.dims->size),
1153 reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
1154 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1155 context_,
1156 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1157 "adding operand", nnapi_errno_);
1158 dequantized_ann_index =
1159 mapping_util_->AddNewNonTensorOperand(mapping_util_);
1160
1161 // Add Dequantize operation.
1162 const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
1163 const uint32_t dequantize_output[1] = {
1164 static_cast<uint32_t>(dequantized_ann_index)};
1165 TF_LITE_ENSURE_OK(
1166 context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
1167 /*input_count=*/1, dequantize_input,
1168 /*output_count=*/1, dequantize_output,
1169 lite_node_index));
1170 dequantize_mapping_->Add(ann_index, dequantized_type,
1171 dequantized_ann_index);
1172 }
1173
1174 // The input for the original operation is modified so that the operation
1175 // now uses the dequantized tensor as input.
1176 augmented_inputs_[nn_input_index] = dequantized_ann_index;
1177
1178 return kTfLiteOk;
1179 }
1180
1181 // Add a RESHAPE op which reshapes an NNAPI intermediate output to the
1182 // dimensions of the TFLite output tensor.
AppendReshape(int nn_input_index,int lite_out_tensor_index,int lite_node_index)1183 TfLiteStatus AppendReshape(int nn_input_index, int lite_out_tensor_index,
1184 int lite_node_index) {
1185 augmented_inputs_.push_back(nn_input_index);
1186 auto& output_tensor = context_->tensors[lite_out_tensor_index];
1187 TF_LITE_ENSURE_STATUS(
1188 AddVectorInt32Operand(output_tensor.dims->data,
1189 static_cast<uint32_t>(output_tensor.dims->size)));
1190 TF_LITE_ENSURE_OK(context_,
1191 AddTensorOutput(lite_out_tensor_index,
1192 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1193 TF_LITE_ENSURE_STATUS(
1194 FinalizeAddOperation(ANEURALNETWORKS_RESHAPE, lite_node_index));
1195 return kTfLiteOk;
1196 }
1197
1198 // Add a ADD op to requantize an NNAPI intermediate output to the scale and
1199 // zero point of the TFLite output tensor.
AppendRequantize(int nn_input_index,int lite_out_tensor_index,int lite_node_index,int tensor_flags=0)1200 TfLiteStatus AppendRequantize(int nn_input_index, int lite_out_tensor_index,
1201 int lite_node_index, int tensor_flags = 0) {
1202 augmented_inputs_.push_back(nn_input_index);
1203 auto& output_tensor = context_->tensors[lite_out_tensor_index];
1204
1205 // Create a zero vector with the same type as the output type. There is only
1206 // one single element in the vector, and it is broadcastable with any
1207 // tensor.
1208 TF_LITE_ENSURE(context_, IsQuantized(output_tensor.type));
1209 bool need_int8_conversion = tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1210 int nn_type = (output_tensor.type == kTfLiteUInt8 || need_int8_conversion)
1211 ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
1212 : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1213 int8_t zero = 0;
1214 TF_LITE_ENSURE_STATUS(AddVectorOperand(&zero, /*num_values=*/1, nn_type,
1215 /*scale=*/1.0f, /*zero_point=*/0));
1216
1217 TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1218 TF_LITE_ENSURE_STATUS(AddTensorOutput(lite_out_tensor_index, tensor_flags));
1219 TF_LITE_ENSURE_STATUS(
1220 FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
1221 return kTfLiteOk;
1222 }
1223
1224 // Lower PACK into CONCAT + RESHAPE when possible
TransformPackIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1225 TfLiteStatus TransformPackIntoSupportedOps(int lite_node_index,
1226 TfLiteNode* node,
1227 TfLiteRegistration* reg) {
1228 // Add input tensors for CONCAT, and calculate the dimensions for the
1229 // output.
1230 int concat_output_ann_index = -1;
1231 TfLitePackParams* builtin =
1232 reinterpret_cast<TfLitePackParams*>(node->builtin_data);
1233 auto& input_tensor = context_->tensors[node->inputs->data[0]];
1234 int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
1235 : builtin->axis;
1236 TF_LITE_ENSURE(context_, axis < input_tensor.dims->size);
1237 uint32_t concat_dim_size = 0;
1238 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
1239 const auto input_index = node->inputs->data[input_pos];
1240 concat_dim_size +=
1241 context_->tensors[node->inputs->data[input_pos]].dims->data[axis];
1242 TF_LITE_ENSURE_STATUS(
1243 AddTensorInput(input_index, /*hybrid_op=*/false,
1244 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1245 }
1246 TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
1247 std::vector<uint32_t> concat_output_shape(input_tensor.dims->size, 0);
1248 for (int i = 0; i < concat_output_shape.size(); i++) {
1249 if (i == axis) {
1250 concat_output_shape[i] = concat_dim_size;
1251 } else {
1252 concat_output_shape[i] = input_tensor.dims->data[i];
1253 }
1254 }
1255 TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
1256 input_tensor.type, concat_output_shape.size(),
1257 concat_output_shape.data(), input_tensor.params.scale,
1258 input_tensor.params.zero_point, &concat_output_ann_index));
1259 TF_LITE_ENSURE_STATUS(
1260 FinalizeAddOperation(ANEURALNETWORKS_CONCATENATION, lite_node_index));
1261
1262 // Reshape the output tensor
1263 TF_LITE_ENSURE_STATUS(AppendReshape(
1264 concat_output_ann_index, node->outputs->data[0], lite_node_index));
1265 return kTfLiteOk;
1266 }
1267
1268 // Lower UNPACK into RESHAPE + SPLIT when possible.
TransformUnpackIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1269 TfLiteStatus TransformUnpackIntoSupportedOps(int lite_node_index,
1270 TfLiteNode* node,
1271 TfLiteRegistration* reg) {
1272 auto& input_tensor = context_->tensors[node->inputs->data[0]];
1273
1274 auto* builtin = reinterpret_cast<TfLiteUnpackParams*>(node->builtin_data);
1275 int axis = builtin->axis < 0 ? builtin->axis + input_tensor.dims->size
1276 : builtin->axis;
1277 TF_LITE_ENSURE(context_, axis >= 0);
1278 TF_LITE_ENSURE(context_, axis < (input_tensor.dims->size - 1));
1279 int num_splits = builtin->num;
1280 TF_LITE_ENSURE(context_, num_splits == input_tensor.dims->data[axis]);
1281 TF_LITE_ENSURE(context_, num_splits == node->outputs->size);
1282
1283 // Step 1: RESHAPE
1284 std::vector<int32_t> intermediate_shape(input_tensor.dims->size - 1);
1285 std::copy(input_tensor.dims->data, input_tensor.dims->data + axis,
1286 intermediate_shape.begin());
1287 intermediate_shape[axis] =
1288 input_tensor.dims->data[axis] * input_tensor.dims->data[axis + 1];
1289 std::copy(input_tensor.dims->data + axis + 2,
1290 input_tensor.dims->data + input_tensor.dims->size,
1291 intermediate_shape.begin() + axis + 1);
1292
1293 TF_LITE_ENSURE_STATUS(AddTensorInput(node->inputs->data[0],
1294 /*hybrid_op=*/false,
1295 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1296 TF_LITE_ENSURE_STATUS(AddVectorInt32Operand(intermediate_shape.data(),
1297 intermediate_shape.size()));
1298 int reshape_output_ann_index = -1;
1299 float scale = input_tensor.params.scale;
1300 // Quantized tensor with zero scale is not valid in NNAPI.
1301 if (IsQuantized(input_tensor.type) && scale == 0.0f) {
1302 scale = 1.0f;
1303 }
1304 TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
1305 input_tensor.type, intermediate_shape.size(),
1306 reinterpret_cast<uint32_t*>(intermediate_shape.data()), scale,
1307 input_tensor.params.zero_point, &reshape_output_ann_index));
1308 TF_LITE_ENSURE_STATUS(
1309 FinalizeAddOperation(ANEURALNETWORKS_RESHAPE, lite_node_index));
1310
1311 // Step 2: SPLIT
1312 augmented_inputs_.push_back(reshape_output_ann_index);
1313 TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
1314 TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(num_splits));
1315 for (int i = 0; i < num_splits; i++) {
1316 int lite_output_index = node->outputs->data[i];
1317 TF_LITE_ENSURE_STATUS(AddTensorOutput(
1318 lite_output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1319 }
1320 TF_LITE_ENSURE_STATUS(
1321 FinalizeAddOperation(ANEURALNETWORKS_SPLIT, lite_node_index));
1322 return kTfLiteOk;
1323 }
1324
1325 // Lower SPLIT_V into SLICEs.
TransformSplitVIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1326 TfLiteStatus TransformSplitVIntoSupportedOps(int lite_node_index,
1327 TfLiteNode* node,
1328 TfLiteRegistration* reg) {
1329 auto& input = context_->tensors[node->inputs->data[0]];
1330 int input_rank = input.dims->size;
1331
1332 const auto& size_splits_tensor = context_->tensors[node->inputs->data[1]];
1333 const auto* size_splits = size_splits_tensor.data.i32;
1334 int num_splits = size_splits_tensor.dims->data[0];
1335 int axis = context_->tensors[node->inputs->data[2]].data.i32[0];
1336 axis = axis < 0 ? axis + input_rank : axis;
1337 TF_LITE_ENSURE(context_, axis >= 0);
1338 TF_LITE_ENSURE(context_, axis < input_rank);
1339 int unknown_split_size = ComputeSplitVUnknownSplitSize(context_, node);
1340
1341 // Keep track of the start index of a slice.
1342 int slice_begin_index = 0;
1343 for (int split_index = 0; split_index < num_splits; split_index++) {
1344 int split_size = size_splits[split_index] == -1
1345 ? unknown_split_size
1346 : size_splits[split_index];
1347 TF_LITE_ENSURE(context_, split_size > 0);
1348
1349 // Parameters of SLICE.
1350 std::vector<int> begin_indices(input_rank);
1351 std::vector<int> slice_sizes(input_rank);
1352 for (int i = 0; i < input_rank; i++) {
1353 if (i == axis) {
1354 // Take only the splitted size.
1355 begin_indices[i] = slice_begin_index;
1356 slice_sizes[i] = split_size;
1357 } else {
1358 // Take the full size.
1359 begin_indices[i] = 0;
1360 slice_sizes[i] = input.dims->data[i];
1361 }
1362 }
1363 slice_begin_index += split_size;
1364
1365 // Build NNAPI SLICE inputs and output.
1366 TF_LITE_ENSURE_STATUS(AddTensorInput(
1367 node->inputs->data[0],
1368 /*hybrid_op=*/false, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1369 TF_LITE_ENSURE_STATUS(
1370 AddVectorInt32Operand(begin_indices.data(), begin_indices.size()));
1371 TF_LITE_ENSURE_STATUS(
1372 AddVectorInt32Operand(slice_sizes.data(), slice_sizes.size()));
1373 int lite_output_index = node->outputs->data[split_index];
1374 TF_LITE_ENSURE_STATUS(AddTensorOutput(
1375 lite_output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1376
1377 TF_LITE_ENSURE_STATUS(
1378 FinalizeAddOperation(ANEURALNETWORKS_SLICE, lite_node_index));
1379 }
1380 return kTfLiteOk;
1381 }
1382
1383 // Lower SQUARED_DIFFERENCE into SUB and MUL.
TransformSquaredDifferenceIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1384 TfLiteStatus TransformSquaredDifferenceIntoSupportedOps(
1385 int lite_node_index, TfLiteNode* node, TfLiteRegistration* reg) {
1386 const TfLiteTensor& lhs = context_->tensors[node->inputs->data[0]];
1387 const TfLiteTensor& output = context_->tensors[node->outputs->data[0]];
1388
1389 // Stage1 : diff = lhs - rhs
1390 int diff_out_ann_index = 0;
1391 {
1392 // For quantized data type, choose a proper scale and zero point based on
1393 // the output range.
1394 float max_output = 0.f;
1395 int diff_output_zero_point = 0;
1396 int diff_output_nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1397 switch (lhs.type) {
1398 case kTfLiteFloat32:
1399 diff_output_nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1400 break;
1401 case kTfLiteInt32:
1402 diff_output_nn_type = ANEURALNETWORKS_TENSOR_INT32;
1403 break;
1404 case kTfLiteUInt8:
1405 max_output = (255 - output.params.zero_point) * output.params.scale;
1406 diff_output_zero_point = 128;
1407 diff_output_nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1408 break;
1409 case kTfLiteInt8:
1410 max_output = (127 - output.params.zero_point) * output.params.scale;
1411 diff_output_zero_point = 0;
1412 diff_output_nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1413 break;
1414 default:
1415 return kTfLiteError;
1416 }
1417 // Final output range: [0, max_output], and output = diff^2,
1418 // -> diff range: [-sqrt(max_output), sqrt(max_output)]
1419 // This range corresponds to [1, 255] for uint8 with zero_point = 128,
1420 // or [-127, 127] for int8 with zero_point = 0.
1421 float diff_output_scale = 2.0f * std::sqrt(max_output) / 254.0f;
1422
1423 TF_LITE_ENSURE_OK(
1424 context_, AddTensorInput(node->inputs->data[0], /*hybrid_op=*/false,
1425 NN_TENSOR_FLAG_SCALAR_AS_TENSOR |
1426 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1427 TF_LITE_ENSURE_OK(
1428 context_, AddTensorInput(node->inputs->data[1], /*hybrid_op=*/false,
1429 NN_TENSOR_FLAG_SCALAR_AS_TENSOR |
1430 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1431 TF_LITE_ENSURE_OK(context_,
1432 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1433 TF_LITE_ENSURE_OK(
1434 context_,
1435 AddAdditionalOutputTensor(
1436 output.dims->size, reinterpret_cast<uint32_t*>(output.dims->data),
1437 diff_output_nn_type, diff_output_scale, diff_output_zero_point,
1438 &diff_out_ann_index));
1439 TF_LITE_ENSURE_OK(
1440 context_, FinalizeAddOperation(ANEURALNETWORKS_SUB, lite_node_index));
1441 }
1442
1443 // Stage2 : out = diff * diff
1444 {
1445 augmented_inputs_.push_back(diff_out_ann_index);
1446 augmented_inputs_.push_back(diff_out_ann_index);
1447 TF_LITE_ENSURE_OK(context_,
1448 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1449 TF_LITE_ENSURE_OK(context_,
1450 AddTensorOutput(node->outputs->data[0],
1451 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1452 TF_LITE_ENSURE_OK(
1453 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1454 }
1455
1456 return kTfLiteOk;
1457 }
1458
1459 // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type,int lite_node_index)1460 TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
1461 int lite_node_index) {
1462 // Actually add a NN API operation
1463 TF_LITE_ENSURE_OK(context_,
1464 AddOperationToModel(
1465 type, static_cast<uint32_t>(augmented_inputs_.size()),
1466 augmented_inputs_.data(),
1467 static_cast<uint32_t>(augmented_outputs_.size()),
1468 augmented_outputs_.data(), lite_node_index));
1469 augmented_inputs_.clear();
1470 augmented_outputs_.clear();
1471 return kTfLiteOk;
1472 }
1473
AddSingleValueTensorAsScalarOperand(int tensor_index,int nn_type)1474 TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
1475 int nn_type) {
1476 const TfLiteTensor* tensor = &context_->tensors[tensor_index];
1477 TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
1478
1479 ANeuralNetworksOperandType operand_type{.type = nn_type};
1480 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1481 context_,
1482 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1483 "adding operand", tensor, nnapi_errno_);
1484 int ann_tensor_index =
1485 mapping_util_->TfLiteIndexToNnIndex(mapping_util_, tensor_index);
1486 if (ann_tensor_index != -1) {
1487 augmented_inputs_.push_back(ann_tensor_index);
1488 return kTfLiteOk;
1489 }
1490 // Allocate a new tensor index
1491 ann_tensor_index =
1492 mapping_util_->AddNewNnTensorIndex(mapping_util_, tensor_index);
1493 augmented_inputs_.push_back(ann_tensor_index);
1494
1495 const TfLiteType tensor_type = tensor->type;
1496 TfLiteType nn_type_equivalent;
1497 TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
1498 &nn_type_equivalent));
1499 if (tensor_type != nn_type_equivalent) {
1500 mapping_util_->AddTypeConversion(mapping_util_, tensor_index,
1501 nn_type_equivalent);
1502 }
1503 return kTfLiteOk;
1504 }
1505
1506 template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,const TfLiteIntArray * dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1507 TfLiteStatus AddNewInputConstantTensor(
1508 int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
1509 const std::vector<T>& tensor_value,
1510 const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1511 TF_LITE_ENSURE_OK(context_,
1512 context_->AddTensors(context_, 1, tensor_index));
1513
1514 TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
1515 new_tensor->type = type;
1516 new_tensor->allocation_type = kTfLiteDynamic;
1517 new_tensor->params = quant_params;
1518
1519 // Not removing the new tensor in case of resizing errors since it will
1520 // be cleared by the context
1521 TF_LITE_ENSURE_OK(
1522 context_,
1523 context_->ResizeTensor(
1524 context_, new_tensor,
1525 // Resize Tensor takes ownership of the dims array passed as param
1526 TfLiteIntArrayCopy(dims)));
1527
1528 memcpy(new_tensor->data.raw,
1529 reinterpret_cast<const char*>(tensor_value.data()),
1530 tensor_value.size() * sizeof(T));
1531
1532 const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
1533 const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
1534 ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1535 quant_params.scale,
1536 quant_params.zero_point};
1537
1538 const int ann_tensor_index =
1539 mapping_util_->AddDelegateGeneratedInputAnnTensorOperand(mapping_util_);
1540
1541 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1542 context_,
1543 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1544 "adding operand", nnapi_errno_);
1545
1546 augmented_inputs_.push_back(ann_tensor_index);
1547
1548 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1549 context_,
1550 nnapi_->ANeuralNetworksModel_setOperandValue(
1551 nn_model_, ann_tensor_index, new_tensor->data.raw,
1552 new_tensor->bytes),
1553 "setting new operand value", nnapi_errno_);
1554
1555 return kTfLiteOk;
1556 }
1557
1558 template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,std::initializer_list<int> dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1559 TfLiteStatus AddNewInputConstantTensor(
1560 int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
1561 const std::vector<T>& tensor_value,
1562 const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1563 TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
1564 dim_array->size = dims.size();
1565 std::copy(dims.begin(), dims.end(), dim_array->data);
1566
1567 const auto result = AddNewInputConstantTensor(
1568 nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
1569 TfLiteIntArrayFree(dim_array);
1570 return result;
1571 }
1572
AddIntermediateOutputTensor(TfLiteType tfl_type,uint32_t dimension_count,const uint32_t * dimension_data,float scale,int32_t zero_point,int * ann_index_out,bool need_int8_conversion=false)1573 TfLiteStatus AddIntermediateOutputTensor(TfLiteType tfl_type,
1574 uint32_t dimension_count,
1575 const uint32_t* dimension_data,
1576 float scale, int32_t zero_point,
1577 int* ann_index_out,
1578 bool need_int8_conversion = false) {
1579 int32_t nn_type;
1580 switch (tfl_type) {
1581 case kTfLiteFloat32:
1582 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1583 break;
1584 case kTfLiteInt8:
1585 nn_type = need_int8_conversion
1586 ? ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
1587 : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1588 break;
1589 case kTfLiteUInt8:
1590 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1591 break;
1592 default:
1593 return kTfLiteError;
1594 }
1595 if (need_int8_conversion) {
1596 zero_point += 128;
1597 }
1598 TF_LITE_ENSURE_STATUS(
1599 AddAdditionalOutputTensor(dimension_count, dimension_data, nn_type,
1600 scale, zero_point, ann_index_out));
1601 return kTfLiteOk;
1602 }
1603
ClearInputOuputLists()1604 void ClearInputOuputLists() {
1605 augmented_inputs_.clear();
1606 augmented_outputs_.clear();
1607 }
1608
1609 private:
1610 // Returns a TF Lite type which has the same memory representation as a
1611 // provided NN API type.
GetEquivalentToANNType(TfLiteContext * context,int nn_type,TfLiteType * type)1612 TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
1613 TfLiteType* type) {
1614 switch (nn_type) {
1615 case ANEURALNETWORKS_INT32:
1616 *type = kTfLiteInt32;
1617 return kTfLiteOk;
1618 case ANEURALNETWORKS_FLOAT32:
1619 *type = kTfLiteFloat32;
1620 return kTfLiteOk;
1621 default:
1622 TF_LITE_KERNEL_LOG(context,
1623 "NN API Delegate: Can't get an equivalent TF Lite "
1624 "type for provided NN API type: %d.\n",
1625 nn_type);
1626 return kTfLiteError;
1627 }
1628 }
1629
1630 template <typename T>
AddScalarOperand(T value,int32_t nn_type)1631 TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
1632 ANeuralNetworksOperandType operand_type{.type = nn_type};
1633 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1634 context_,
1635 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1636 "adding operand", nnapi_errno_);
1637 const int ann_index = mapping_util_->AddNewNonTensorOperand(mapping_util_);
1638 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1639 context_,
1640 nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
1641 &value, sizeof(T)),
1642 "setting new operand value", nnapi_errno_);
1643 augmented_inputs_.push_back(ann_index);
1644 return kTfLiteOk;
1645 }
1646
1647 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type,float scale,int32_t zero_point)1648 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1649 int32_t nn_type, float scale,
1650 int32_t zero_point) {
1651 ANeuralNetworksOperandType operand_type{.type = nn_type,
1652 .dimensionCount = 1,
1653 .dimensions = &num_values,
1654 .scale = scale,
1655 .zeroPoint = zero_point};
1656
1657 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1658 context_,
1659 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1660 "adding operand", nnapi_errno_);
1661
1662 const int ann_index = mapping_util_->AddNewNonTensorOperand(mapping_util_);
1663 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1664 context_,
1665 nnapi_->ANeuralNetworksModel_setOperandValue(
1666 nn_model_, ann_index, values, sizeof(T) * num_values),
1667 "settings new operand value", nnapi_errno_);
1668 augmented_inputs_.push_back(ann_index);
1669 return kTfLiteOk;
1670 }
1671
1672 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)1673 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1674 int32_t nn_type) {
1675 return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
1676 /*zero_point=*/0);
1677 }
1678
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)1679 TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
1680 const uint32_t* dimension_data,
1681 int* ann_index_out) {
1682 return AddAdditionalOutputTensor(
1683 dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
1684 /*scale=*/0.f, /*zero_point=*/0, ann_index_out);
1685 }
1686
AddAdditionalOutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int32_t nn_type,float scale,int32_t zero_point,int * ann_index_out)1687 TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
1688 const uint32_t* dimension_data,
1689 int32_t nn_type, float scale,
1690 int32_t zero_point,
1691 int* ann_index_out) {
1692 ANeuralNetworksOperandType operand_type{
1693 .type = nn_type,
1694 .dimensionCount = dimension_count,
1695 .dimensions = dimension_data,
1696 .scale = scale,
1697 .zeroPoint = zero_point,
1698 };
1699 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1700 context_,
1701 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1702 "adding operand", nnapi_errno_);
1703 const int ann_index = mapping_util_->AddNewNonTensorOperand(mapping_util_);
1704 augmented_outputs_.push_back(ann_index);
1705 if (ann_index_out) *ann_index_out = ann_index;
1706 return kTfLiteOk;
1707 }
1708
1709 // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
1710 // This returns the NN API tensor index corresponding to the created tensor.
1711 // If another caller previously created a NN API tensor for `tensor_index`
1712 // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices,int tensor_flags=0)1713 TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
1714 std::vector<uint32_t>* indices, int tensor_flags = 0) {
1715 const bool scalar_as_tensor =
1716 tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
1717 const bool need_int8_conversion =
1718 tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1719 const bool use_int8_asymm_signed =
1720 tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
1721 const bool force_per_channel =
1722 tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
1723 const bool need_half2float_conversion =
1724 tensor_flags & NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
1725
1726 int ann_tensor_index =
1727 mapping_util_->TfLiteIndexToNnIndex(mapping_util_, tensor_index);
1728 if (ann_tensor_index != -1) {
1729 indices->push_back(ann_tensor_index);
1730 return kTfLiteOk;
1731 }
1732 // Allocate a new tensor index
1733 ann_tensor_index =
1734 mapping_util_->AddNewNnTensorIndex(mapping_util_, tensor_index);
1735
1736 // Parameters needed for new type.
1737 int32_t nn_type = 0;
1738 float scale = 0.0f;
1739 int32_t zeroPoint = 0;
1740 ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
1741 TfLiteTensor* tensor = &context_->tensors[tensor_index];
1742 TfLiteType tensor_type = tensor->type;
1743 if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
1744 // For legacy reason, UINT8 weights in hybrid operators are actually INT8
1745 // values and should be interpreted as such.
1746 tensor_type = kTfLiteInt8;
1747 }
1748 switch (tensor_type) {
1749 case kTfLiteNoType:
1750 // Tensors added during initialization of Ops don't have a type yet and
1751 // should not be registered with the NNAPI.
1752 indices->push_back(-1);
1753 return kTfLiteOk;
1754 case kTfLiteFloat32:
1755 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1756 break;
1757 case kTfLiteFloat16:
1758 nn_type = ANEURALNETWORKS_TENSOR_FLOAT16;
1759 if (need_half2float_conversion) {
1760 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1761 mapping_util_->AddTypeConversion(mapping_util_, tensor_index,
1762 kTfLiteFloat32);
1763 }
1764 break;
1765 case kTfLiteUInt8:
1766 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1767 scale = tensor->params.scale;
1768 zeroPoint = tensor->params.zero_point;
1769 if (scale == 0) {
1770 // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
1771 // NNAPI.
1772 scale = 1;
1773 }
1774 break;
1775 case kTfLiteInt8:
1776 // If explicit int8 conversion is needed, we still need
1777 // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
1778 if (use_int8_asymm_signed) {
1779 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1780 } else if (need_int8_conversion) {
1781 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1782 } else {
1783 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
1784 }
1785 scale = tensor->params.scale;
1786 zeroPoint = tensor->params.zero_point;
1787 if (tensor->quantization.type == kTfLiteAffineQuantization) {
1788 TfLiteAffineQuantization* quantization_params =
1789 static_cast<TfLiteAffineQuantization*>(
1790 tensor->quantization.params);
1791 if (quantization_params->scale->size > 1 || force_per_channel) {
1792 // Set up per-channel quantization.
1793 ann_perchannel_params = {
1794 .channelDim = static_cast<uint32_t>(
1795 quantization_params->quantized_dimension),
1796 .scaleCount =
1797 static_cast<uint32_t>(quantization_params->scale->size),
1798 .scales = quantization_params->scale->data,
1799 };
1800 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
1801 scale = 0.0f;
1802 zeroPoint = 0;
1803 } else if (quantization_params->scale->size == 1) {
1804 scale = quantization_params->scale->data[0];
1805 zeroPoint = quantization_params->zero_point->data[0];
1806 }
1807 }
1808 if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1809 if (need_int8_conversion) {
1810 zeroPoint += 128;
1811 mapping_util_->AddTypeConversion(mapping_util_, tensor_index,
1812 kTfLiteUInt8);
1813 }
1814 if (scale == 0) {
1815 // QUANT8 tensors with zero scale are not valid in NNAPI.
1816 scale = 1;
1817 }
1818 }
1819 break;
1820 case kTfLiteInt32:
1821 nn_type = ANEURALNETWORKS_TENSOR_INT32;
1822 scale = tensor->params.scale;
1823 zeroPoint = tensor->params.zero_point;
1824 break;
1825 case kTfLiteBool:
1826 nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
1827 break;
1828 case kTfLiteInt16:
1829 nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
1830 scale = tensor->params.scale;
1831 zeroPoint = tensor->params.zero_point;
1832 break;
1833 default:
1834 context_->ReportError(
1835 context_, "Failed to add NN API tensor: type %s is not supported.",
1836 TfLiteTypeGetName(tensor_type));
1837 return kTfLiteError;
1838 }
1839 bool has_unspecified_dimensions = ::tflite::HasUnspecifiedDimension(tensor);
1840 uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
1841 std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
1842 if (has_unspecified_dimensions) {
1843 for (int i = 0; i < tensor->dims_signature->size; i++) {
1844 dims_unspecified[i] = tensor->dims_signature->data[i] == -1
1845 ? 0
1846 : tensor->dims_signature->data[i];
1847 }
1848 }
1849 uint32_t* tensor_dims =
1850 has_unspecified_dimensions && allow_dynamic_dimensions_
1851 ? dims_unspecified.data()
1852 : reinterpret_cast<uint32_t*>(tensor->dims->data);
1853 if (scalar_as_tensor && tensor_rank == 0) {
1854 // Use rank 1, shape {1} operand for TFLite scalar tensors.
1855 tensor_rank = 1;
1856 tensor_dims = &tensor_rank;
1857 }
1858 if (tensor_rank == 0) {
1859 // if the tensor_rank is 0, the dimension ptr must be nullptr.
1860 tensor_dims = nullptr;
1861 }
1862
1863 ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1864 scale, zeroPoint};
1865 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1866 context_,
1867 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1868 "adding operand", tensor, nnapi_errno_);
1869
1870 if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1871 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1872 context_,
1873 nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
1874 nn_model_, ann_tensor_index, &ann_perchannel_params),
1875 "setting new operand per channel quantization params", tensor,
1876 nnapi_errno_);
1877 }
1878 if (tensor->allocation_type == kTfLiteMmapRo) {
1879 if (IsQuantized(tensor_type) && need_int8_conversion &&
1880 nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1881 // We need to add a tensor and convert the weights into uint8.
1882 // Currently this is only needed for fully_connected. The new_tensor is
1883 // needed for lifetime management for the converted weights.
1884 int new_tensor_index = -1;
1885 TF_LITE_ENSURE_OK(context_,
1886 context_->AddTensors(context_, 1, &new_tensor_index));
1887 TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1888 new_tensor->type = kTfLiteUInt8;
1889 new_tensor->allocation_type = kTfLiteDynamic;
1890 new_tensor->params.scale = scale;
1891 new_tensor->params.zero_point = zeroPoint;
1892 // Not removing the new tensor in case of resizing errors since it will
1893 // be cleared by the context
1894 TF_LITE_ENSURE_OK(
1895 context_, context_->ResizeTensor(context_, new_tensor,
1896 // Resize Tensor takes ownership of
1897 // the dims array passed as param
1898 TfLiteIntArrayCopy(tensor->dims)));
1899 // Convert the int8 value into corresponding uint8 value;
1900 const auto num_elements = NumElements(tensor);
1901 for (int i = 0; i < num_elements; ++i) {
1902 new_tensor->data.uint8[i] = static_cast<const uint8_t>(
1903 static_cast<int32_t>(tensor->data.int8[i]) + 128);
1904 }
1905 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1906 context_,
1907 nnapi_->ANeuralNetworksModel_setOperandValue(
1908 nn_model_, ann_tensor_index, new_tensor->data.raw,
1909 new_tensor->bytes),
1910 "setting new operand value", tensor, nnapi_errno_);
1911 } else if (tensor_type == kTfLiteFloat16 && need_half2float_conversion) {
1912 // We need to convert the constant fp16 weights to fp32. The new_tensor
1913 // is needed for lifetime management for the converted weights.
1914 int new_tensor_index = -1;
1915 TF_LITE_ENSURE_OK(context_,
1916 context_->AddTensors(context_, 1, &new_tensor_index));
1917 TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1918 new_tensor->type = kTfLiteFloat32;
1919 new_tensor->allocation_type = kTfLiteDynamic;
1920 // Not removing the new tensor in case of resizing errors since it will
1921 // be cleared by the context
1922 TF_LITE_ENSURE_OK(
1923 context_, context_->ResizeTensor(context_, new_tensor,
1924 // Resize Tensor takes ownership of
1925 // the dims array passed as param
1926 TfLiteIntArrayCopy(tensor->dims)));
1927 // Convert the fp16 value into corresponding fp32 value;
1928 const auto num_elements = NumElements(tensor);
1929 for (int i = 0; i < num_elements; ++i) {
1930 new_tensor->data.f[i] = fp16_ieee_to_fp32_value(
1931 reinterpret_cast<uint16_t*>(tensor->data.data)[i]);
1932 }
1933 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1934 context_,
1935 nnapi_->ANeuralNetworksModel_setOperandValue(
1936 nn_model_, ann_tensor_index, new_tensor->data.data,
1937 new_tensor->bytes),
1938 "setting new operand value", tensor, nnapi_errno_);
1939 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
1940 } else if (tensor->allocation &&
1941 static_cast<const Allocation*>(tensor->allocation)->type() ==
1942 Allocation::Type::kMMap) {
1943 const MMAPAllocation* mmap_alloc =
1944 static_cast<const MMAPAllocation*>(tensor->allocation);
1945 if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
1946 ANeuralNetworksMemory* ann_memory_handle = nullptr;
1947 nnapi_->ANeuralNetworksMemory_createFromFd(
1948 mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
1949 &ann_memory_handle);
1950 allocation_memory_mapping_->insert(
1951 std::make_pair(mmap_alloc, ann_memory_handle));
1952 }
1953 ANeuralNetworksMemory* ann_memory_handle =
1954 allocation_memory_mapping_->at(mmap_alloc);
1955 // Compute the offset to the base pointer of the MMAPAllocation.
1956 auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
1957 reinterpret_cast<const uint8_t*>(mmap_alloc->base());
1958 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1959 context_,
1960 nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
1961 nn_model_, ann_tensor_index, ann_memory_handle, offset,
1962 tensor->bytes),
1963 "setting new operand value from memory", tensor, nnapi_errno_);
1964 #endif
1965 } else {
1966 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1967 context_,
1968 nnapi_->ANeuralNetworksModel_setOperandValue(
1969 nn_model_, ann_tensor_index, tensor->data.data, tensor->bytes),
1970 "setting new operand value", tensor, nnapi_errno_);
1971 }
1972 }
1973 indices->push_back(ann_tensor_index);
1974 return kTfLiteOk;
1975 }
1976
1977 // Access to NNAPI.
1978 const NnApi* const nnapi_;
1979
1980 // TfLiteContext for error handling.
1981 TfLiteContext* const context_;
1982
1983 // Tracks relationship between indices.
1984 NnapiMappingUtilCInterface* const mapping_util_;
1985
1986 // Keeps mapping of ANN quantized tensor and float data type to equivalent
1987 // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
1988 // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
1989 // tensor #4 to a FLOAT32 tensor.
1990 DequantizeMapping* const dequantize_mapping_;
1991
1992 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
1993 allocation_memory_mapping_;
1994
1995 // The NNAPI model.
1996 ANeuralNetworksModel* const nn_model_;
1997
1998 // Inputs and outputs for the current op. These are augmented in the sense
1999 // that NN API uses operands for all arguments, not just tensors, unlike
2000 // TensorFlow Lite.
2001 std::vector<uint32_t> augmented_inputs_;
2002 std::vector<uint32_t> augmented_outputs_;
2003
2004 // Return status code of the latest NNAPI call.
2005 int* nnapi_errno_;
2006
2007 // Whether to allow dynamic batch size without re-compilation.
2008 bool allow_dynamic_dimensions_;
2009 }; // namespace nnapi
2010
2011 namespace {
2012 struct OpValidationContext {
2013 bool is_valid;
2014 std::vector<NNAPIValidationFailure>* validation_failures;
2015 };
2016
2017 #define EXPECT_INPUT_TYPE_IN(actual_type, ...) \
2018 ExpectTypeIn(actual_type, {__VA_ARGS__}, \
2019 NNAPIValidationFailureType::kUnsupportedInputType, \
2020 "Input type not in expected list " #__VA_ARGS__, &val_ctx)
2021
AddValidationFailure(NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)2022 inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
2023 const char* message,
2024 OpValidationContext* val_ctx) {
2025 val_ctx->is_valid = false;
2026
2027 #ifdef NNAPI_VERBOSE_VALIDATION
2028 if (val_ctx->validation_failures) {
2029 val_ctx->validation_failures->push_back({failure_type, message});
2030 }
2031 #endif
2032 }
2033
2034 template <typename... Args>
AddValidationFailureFmt(OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)2035 inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
2036 NNAPIValidationFailureType failure_type,
2037 const char* message_fmt, Args... args) {
2038 val_ctx->is_valid = false;
2039 #ifdef NNAPI_VERBOSE_VALIDATION
2040 if (val_ctx->validation_failures) {
2041 size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
2042 std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
2043 snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
2044
2045 val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
2046 }
2047 #endif
2048 }
2049
Expect(bool condition,NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)2050 inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
2051 const char* message, OpValidationContext* val_ctx) {
2052 if (!condition) {
2053 AddValidationFailure(failure_type, message, val_ctx);
2054 return false;
2055 }
2056 return true;
2057 }
2058
2059 template <typename... Args>
ExpectFmt(bool condition,OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)2060 inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
2061 NNAPIValidationFailureType failure_type,
2062 const char* message_fmt, Args... args) {
2063 if (!condition) {
2064 AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
2065 return false;
2066 }
2067 return true;
2068 }
2069
ExpectTypeIn(TfLiteType actual_type,std::initializer_list<TfLiteType> allowed_types,NNAPIValidationFailureType failure_type,const char * msg,OpValidationContext * val_ctx)2070 inline bool ExpectTypeIn(TfLiteType actual_type,
2071 std::initializer_list<TfLiteType> allowed_types,
2072 NNAPIValidationFailureType failure_type,
2073 const char* msg, OpValidationContext* val_ctx) {
2074 return Expect(std::find(allowed_types.begin(), allowed_types.end(),
2075 actual_type) != allowed_types.end(),
2076 failure_type, msg, val_ctx);
2077 }
2078
ExpectMinAndroidSdkVersion(int curr_version,int min_version,OpValidationContext * val_ctx)2079 inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
2080 OpValidationContext* val_ctx) {
2081 return ExpectFmt(curr_version >= min_version, val_ctx,
2082 NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2083 "Android sdk version less than %d", min_version);
2084 }
2085
ExpectMaxOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)2086 inline bool ExpectMaxOpVersion(int curr_version, int max_version,
2087 OpValidationContext* val_ctx) {
2088 return ExpectFmt(curr_version <= max_version, val_ctx,
2089 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2090 "OP Version higher than %d", max_version);
2091 }
2092
ExpectOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)2093 inline bool ExpectOpVersion(int curr_version, int max_version,
2094 OpValidationContext* val_ctx) {
2095 return ExpectFmt(curr_version <= max_version, val_ctx,
2096 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2097 "OP Version different from %d", max_version);
2098 }
2099
ExpectIsFloatOperator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2100 inline bool ExpectIsFloatOperator(const TfLiteContext* context,
2101 const TfLiteNode* node,
2102 OpValidationContext* val_ctx) {
2103 const auto input_type = context->tensors[node->inputs->data[0]].type;
2104 return Expect(IsFloat(input_type),
2105 NNAPIValidationFailureType::kUnsupportedInputType,
2106 "Input should be Float", val_ctx);
2107 }
2108
ExpectIsFloatOrUint8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2109 bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
2110 const TfLiteNode* node,
2111 OpValidationContext* val_ctx) {
2112 const auto input_type = context->tensors[node->inputs->data[0]].type;
2113 return Expect(IsFloatOrUInt8(input_type),
2114 NNAPIValidationFailureType::kUnsupportedInputType,
2115 "Input should be Float or UINT8", val_ctx);
2116 }
2117
ExpectIsFloatOrQuant8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2118 bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
2119 const TfLiteNode* node,
2120 OpValidationContext* val_ctx) {
2121 const auto input_type = context->tensors[node->inputs->data[0]].type;
2122 return Expect(IsFloatOrQuantized(input_type),
2123 NNAPIValidationFailureType::kUnsupportedInputType,
2124 "Input should be Float or Quant8", val_ctx);
2125 }
2126
ExpectIsFloatOrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2127 bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
2128 const TfLiteNode* node,
2129 OpValidationContext* val_ctx) {
2130 const auto input_type = context->tensors[node->inputs->data[0]].type;
2131 return Expect(IsFloatOrInt32(input_type),
2132 NNAPIValidationFailureType::kUnsupportedInputType,
2133 "Input should be Float or Int32", val_ctx);
2134 }
2135
ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2136 bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
2137 const TfLiteNode* node,
2138 OpValidationContext* val_ctx) {
2139 const auto input_type = context->tensors[node->inputs->data[0]].type;
2140 return Expect(IsFloatQuantizedOrInt32(input_type),
2141 NNAPIValidationFailureType::kUnsupportedInputType,
2142 "Input should be Float, Quant8, or Int32", val_ctx);
2143 }
2144
2145 // When using NN API version 1.0 or 1.1, the condition below must be true for
2146 // quantized versions of the following ops:
2147 // * CONV_2D
2148 // * DEPTHWISE_CONV_2D
2149 // * FULLY_CONNECTED (where filter actually stands for weights)
2150 // The condition is relaxed and no longer required since version 1.2.
ExpectIsRestrictedScalesCompliant(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)2151 bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
2152 const TfLiteNode* node,
2153 OpValidationContext* val_ctx) {
2154 const int input_id = node->inputs->data[0];
2155 const int filter_id = node->inputs->data[1];
2156 const int output_id = node->outputs->data[0];
2157 const float input_scale = context->tensors[input_id].params.scale;
2158 const float filter_scale = context->tensors[filter_id].params.scale;
2159 const float output_scale = context->tensors[output_id].params.scale;
2160 return Expect(input_scale * filter_scale < output_scale,
2161 NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
2162 "When using NN API version 1.0 or 1.1, input_scale * "
2163 "filter_scale < output_scale.",
2164 val_ctx);
2165 }
2166
AppendDynamicDimensions(const TfLiteContext * context,const TfLiteIntArray * tensor_indices,std::vector<int> & dynamic_dimensions)2167 void AppendDynamicDimensions(const TfLiteContext* context,
2168 const TfLiteIntArray* tensor_indices,
2169 std::vector<int>& dynamic_dimensions) {
2170 for (int i : TfLiteIntArrayView(tensor_indices)) {
2171 if (i == kTfLiteOptionalTensor) continue;
2172 const auto& tensor = context->tensors[i];
2173 if (tensor.dims_signature) {
2174 for (int i = 0; i < tensor.dims_signature->size; i++) {
2175 if (tensor.dims_signature->data[i] == -1) {
2176 dynamic_dimensions.push_back(tensor.dims->data[i]);
2177 }
2178 }
2179 }
2180 }
2181 }
2182
CreateExecutionCacheSignature(const TfLiteContext * context,const TfLiteNode * node,const StatefulNnApiDelegate::Options & delegate_options,const std::vector<StatefulNnApiDelegate::MemoryRegistration> & tensor_memory_map)2183 NNAPIExecutionCache::Signature CreateExecutionCacheSignature(
2184 const TfLiteContext* context, const TfLiteNode* node,
2185 const StatefulNnApiDelegate::Options& delegate_options,
2186 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
2187 tensor_memory_map) {
2188 // Tensor buffer handle timestamps.
2189 std::vector<uint64_t> tensor_handle_timestamps(context->tensors_size);
2190 for (int i = 0; i < tensor_handle_timestamps.size(); i++) {
2191 auto handle = context->tensors[i].buffer_handle;
2192 if (handle < 0 || handle >= tensor_memory_map.size()) {
2193 tensor_handle_timestamps[i] = kNoMemoryTimestamp;
2194 } else {
2195 tensor_handle_timestamps[i] = tensor_memory_map[handle].timestamp;
2196 }
2197 }
2198
2199 // Dynamic dimensions.
2200 std::vector<int> dynamic_dimensions;
2201 if (delegate_options.allow_dynamic_dimensions) {
2202 AppendDynamicDimensions(context, node->inputs, dynamic_dimensions);
2203 // When using custom ops, we cannot infer output shapes, so it is not part
2204 // of the execution request.
2205 if (delegate_options.vendor_plugin == nullptr) {
2206 AppendDynamicDimensions(context, node->outputs, dynamic_dimensions);
2207 }
2208 }
2209
2210 return NNAPIExecutionCache::Signature{std::move(tensor_handle_timestamps),
2211 std::move(dynamic_dimensions)};
2212 }
2213
2214 template <typename T>
HashVector(const std::vector<T> & vec)2215 std::size_t HashVector(const std::vector<T>& vec) {
2216 std::size_t seed = vec.size();
2217 auto hasher = std::hash<T>{};
2218 for (const auto& i : vec) {
2219 seed = CombineHashes({seed, hasher(i)});
2220 }
2221 return seed;
2222 }
2223
2224 } // namespace
2225
operator ==(const Signature & other) const2226 bool NNAPIExecutionCache::Signature::operator==(const Signature& other) const {
2227 return tensor_handle_timestamps == other.tensor_handle_timestamps &&
2228 dynamic_dimensions == other.dynamic_dimensions;
2229 }
2230
operator ()(const Signature & signature) const2231 std::size_t NNAPIExecutionCache::Signature::Hasher::operator()(
2232 const Signature& signature) const {
2233 return CombineHashes({HashVector(signature.tensor_handle_timestamps),
2234 HashVector(signature.dynamic_dimensions)});
2235 }
2236
Get(const Signature & signature)2237 ANeuralNetworksExecution* NNAPIExecutionCache::Get(const Signature& signature) {
2238 auto it = lookup_.find(signature);
2239
2240 // Cache miss
2241 if (it == lookup_.end()) {
2242 return nullptr;
2243 }
2244
2245 // Cache hit, put the entry to the front
2246 auto& list_it = it->second.first;
2247 order_.erase(list_it);
2248 order_.push_front(signature);
2249 // Update the iterator in the lookup_ map
2250 list_it = order_.begin();
2251
2252 auto& execution = it->second.second;
2253 return execution.get();
2254 }
2255
Put(const Signature & signature,UniqueExecution execution)2256 void NNAPIExecutionCache::Put(const Signature& signature,
2257 UniqueExecution execution) {
2258 // Release the least recently used cache if cache is full.
2259 if (order_.size() >= max_cache_size_) {
2260 ReleaseLRU();
2261 }
2262
2263 // Register the new cache.
2264 order_.push_front(signature);
2265 lookup_.emplace(signature,
2266 std::make_pair(order_.begin(), std::move(execution)));
2267 }
2268
Clear()2269 void NNAPIExecutionCache::Clear() {
2270 order_.clear();
2271 lookup_.clear();
2272 }
2273
SetMaxCacheSize(uint32_t max_cache_size)2274 void NNAPIExecutionCache::SetMaxCacheSize(uint32_t max_cache_size) {
2275 max_cache_size_ = max_cache_size;
2276 while (order_.size() > max_cache_size_) {
2277 ReleaseLRU();
2278 }
2279 }
2280
ReleaseLRU()2281 void NNAPIExecutionCache::ReleaseLRU() {
2282 lookup_.erase(order_.back());
2283 order_.pop_back();
2284 }
2285
2286 // Return a function that knows how to translate a node into its operands
2287 // when called. You can use this function to see if a node is supported
2288 // (i.e. if the returned MappingFn is null, then the node is not supported).
Validate(const TfLiteContext * context,const TfLiteRegistration * registration,int android_sdk_version,const TfLiteNode * node,bool is_accelerator_specified,NnapiDelegateVendorPlugin * vendor_plugin,std::vector<NNAPIValidationFailure> * map_failures)2289 bool NNAPIDelegateKernel::Validate(
2290 const TfLiteContext* context, const TfLiteRegistration* registration,
2291 int android_sdk_version, const TfLiteNode* node,
2292 bool is_accelerator_specified, NnapiDelegateVendorPlugin* vendor_plugin,
2293 std::vector<NNAPIValidationFailure>* map_failures) {
2294 OpValidationContext val_ctx{true, map_failures};
2295 if (vendor_plugin) {
2296 if (vendor_plugin->ValidateNode(context, registration, node)) {
2297 return true;
2298 }
2299 }
2300 auto builtin_code = registration->builtin_code;
2301 auto version = registration->version;
2302 switch (builtin_code) {
2303 case kTfLiteBuiltinAdd: {
2304 ExpectMaxOpVersion(version, 2, &val_ctx);
2305 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2306 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
2307 if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
2308 Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
2309 ->activation == kTfLiteActNone,
2310 NNAPIValidationFailureType::kNoActivationExpected,
2311 "No activation function supported", &val_ctx);
2312 }
2313 } else {
2314 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2315 }
2316 } break;
2317 case kTfLiteBuiltinArgMax:
2318 case kTfLiteBuiltinArgMin: {
2319 ExpectMaxOpVersion(version, 2, &val_ctx);
2320 // Those operators were introduced in NNAPI 1.2.
2321 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2322 &val_ctx);
2323 const TfLiteType input_type =
2324 context->tensors[node->inputs->data[(0)]].type;
2325 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2326 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2327
2328 const auto& axis_tensor = context->tensors[node->inputs->data[1]];
2329 if (axis_tensor.type == kTfLiteInt64) {
2330 Expect(
2331 axis_tensor.allocation_type == kTfLiteMmapRo &&
2332 *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
2333 *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
2334 NNAPIValidationFailureType::kUnsupportedInputType,
2335 "NNAPI only supports axis as int32. If the axis type is int64 and "
2336 "constant we can convert it to int32 if the value isn't too "
2337 "large.",
2338 &val_ctx);
2339 } else {
2340 Expect(axis_tensor.type == kTfLiteInt32,
2341 NNAPIValidationFailureType::kUnsupportedInputType,
2342 "Axis should be Int32", &val_ctx);
2343 }
2344 if (builtin_code == kTfLiteBuiltinArgMax) {
2345 auto builtin =
2346 reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
2347 Expect(builtin->output_type == kTfLiteInt32,
2348 NNAPIValidationFailureType::kUnsupportedOutputType,
2349 "NNAPI only supports int32 output.", &val_ctx);
2350 } else {
2351 auto builtin =
2352 reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
2353 Expect(builtin->output_type == kTfLiteInt32,
2354 NNAPIValidationFailureType::kUnsupportedOutputType,
2355 "NNAPI only supports int32 output.", &val_ctx);
2356 }
2357 } break;
2358 case kTfLiteBuiltinMul: {
2359 if (is_accelerator_specified) {
2360 ExpectMaxOpVersion(version, 3, &val_ctx);
2361 } else {
2362 ExpectMaxOpVersion(version, 2, &val_ctx);
2363 }
2364 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2365 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
2366 if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
2367 Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
2368 ->activation == kTfLiteActNone,
2369 NNAPIValidationFailureType::kNoActivationExpected,
2370 "No activation function supported", &val_ctx);
2371 }
2372 } else {
2373 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2374 }
2375 } break;
2376 case kTfLiteBuiltinAveragePool2d: {
2377 ExpectMaxOpVersion(version, 2, &val_ctx);
2378 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2379 auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
2380 // TODO(b/138756912): Large filter window would overflow on the
2381 // quantized reference CPU path.
2382 if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
2383 Expect(is_accelerator_specified ||
2384 (builtin->filter_width * builtin->filter_height <= 256),
2385 NNAPIValidationFailureType::kUnsupportedOperandSize,
2386 "Large filter window would overflow on the reference CPU path",
2387 &val_ctx);
2388 }
2389 } break;
2390 case kTfLiteBuiltinMaxPool2d: {
2391 ExpectMaxOpVersion(version, 2, &val_ctx);
2392 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2393 } break;
2394 case kTfLiteBuiltinL2Pool2d: {
2395 ExpectOpVersion(version, 1, &val_ctx);
2396 ExpectIsFloatOperator(context, node, &val_ctx);
2397
2398 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2399 auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
2400 Expect(builtin->activation == kTfLiteActNone,
2401 NNAPIValidationFailureType::kUnsupportedOperandValue,
2402 "Before NNAPI 1.2 fused activation for l2_pool may not be "
2403 "supported.",
2404 &val_ctx);
2405 }
2406 } break;
2407 case kTfLiteBuiltinConv2d: {
2408 ExpectMaxOpVersion(version, 5, &val_ctx);
2409 const auto& input_tensor = context->tensors[node->inputs->data[0]];
2410 const auto& filter_tensor = context->tensors[node->inputs->data[1]];
2411 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2412 Expect(!IsHybridOperator(context, builtin_code, node),
2413 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2414 "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
2415 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2416
2417 if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
2418 TfLiteAffineQuantization* quantization_params =
2419 static_cast<TfLiteAffineQuantization*>(
2420 filter_tensor.quantization.params);
2421 Expect(quantization_params->scale->size <= 1,
2422 NNAPIValidationFailureType::kUnsupportedQuantizationType,
2423 "Per-channel quantized convolution not supported before NNAPI "
2424 "1.2.",
2425 &val_ctx);
2426 }
2427 }
2428 const auto input_type = input_tensor.type;
2429 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2430 input_type == kTfLiteUInt8) {
2431 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2432 }
2433 auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
2434 // TODO(b/132950584): Add support for Conv2D with omitted bias.
2435 Expect(node->inputs->size == 3,
2436 NNAPIValidationFailureType::kMissingRequiredOperand,
2437 "Conv2D with omitted bias not supported", &val_ctx);
2438 if (builtin->dilation_width_factor != 1 ||
2439 builtin->dilation_height_factor != 1) {
2440 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2441 NNAPIValidationFailureType::kUnsupportedOperandValue,
2442 "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
2443 }
2444 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2445 Expect(input_tensor.dims->data[3] == filter_tensor.dims->data[3],
2446 NNAPIValidationFailureType::kUnsupportedOperandValue,
2447 "Grouped convolution not supported before NNAPI < 1.2",
2448 &val_ctx);
2449 }
2450 } break;
2451 case kTfLiteBuiltinDepthwiseConv2d: {
2452 ExpectMaxOpVersion(version, 3, &val_ctx);
2453
2454 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2455 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2456
2457 const auto input_type = context->tensors[node->inputs->data[0]].type;
2458 if (input_type == kTfLiteUInt8) {
2459 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2460 }
2461
2462 auto builtin =
2463 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
2464 Expect(builtin->dilation_width_factor == 1 &&
2465 builtin->dilation_height_factor == 1,
2466 NNAPIValidationFailureType::kUnsupportedOperandValue,
2467 "dilation_width_factor and dilation_height_factor expected to "
2468 "be equal to 1",
2469 &val_ctx);
2470 }
2471 } break;
2472 case kTfLiteBuiltinFullyConnected: {
2473 ExpectMaxOpVersion(version, 5, &val_ctx);
2474 const auto output_type = context->tensors[node->outputs->data[0]].type;
2475 Expect(output_type != kTfLiteInt16,
2476 NNAPIValidationFailureType::kUnsupportedOutputType,
2477 "Unsupported output of type kTfLiteInt16", &val_ctx);
2478 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2479 Expect(!IsHybridOperator(context, builtin_code, node),
2480 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2481 "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
2482 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2483 }
2484 const auto input_type = context->tensors[node->inputs->data[0]].type;
2485 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2486 input_type == kTfLiteUInt8) {
2487 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2488 }
2489 auto builtin =
2490 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
2491 if (builtin->keep_num_dims) {
2492 ExpectMinAndroidSdkVersion(android_sdk_version,
2493 kMinSdkVersionForNNAPI13, &val_ctx);
2494 }
2495 } break;
2496 case kTfLiteBuiltinHardSwish: {
2497 // Add support for hardswish. For Pre-Q devices, deconstructing it into
2498 // basic ops. Though for some nnapi accelerators using optimized tflite
2499 // kernels might even be faster.
2500 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2501 } break;
2502 case kTfLiteBuiltinSoftmax: {
2503 ExpectOpVersion(version, 2, &val_ctx);
2504 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2505 const auto& output = context->tensors[node->outputs->data[0]];
2506 ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8},
2507 NNAPIValidationFailureType::kUnsupportedOutputType,
2508 "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, "
2509 "kTfLiteInt8.",
2510 &val_ctx);
2511 const auto& input = context->tensors[node->inputs->data[0]];
2512 const int input_rank = input.dims->size;
2513 Expect(input_rank <= 4,
2514 NNAPIValidationFailureType::kUnsupportedOperandRank,
2515 "Input rank should be <= 4", &val_ctx);
2516 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2517 Expect(
2518 input_rank == 2 || input_rank == 4,
2519 NNAPIValidationFailureType::kUnsupportedOperandRank,
2520 "Before API level 29 only 2D and 4D input tensors were supported.",
2521 &val_ctx);
2522 }
2523 } break;
2524 case kTfLiteBuiltinReshape: {
2525 ExpectOpVersion(version, 1, &val_ctx);
2526 if (android_sdk_version < kNNAPIRuntimeFeatureLevel6) {
2527 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2528 } else {
2529 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
2530 }
2531 const auto& input = context->tensors[node->inputs->data[0]];
2532 Expect(input.dims->size <= 4,
2533 NNAPIValidationFailureType::kUnsupportedOperandRank,
2534 "Input rank should be <= 4", &val_ctx);
2535 const auto& output = context->tensors[node->outputs->data[0]];
2536 Expect(output.dims->size <= 4,
2537 NNAPIValidationFailureType::kUnsupportedOperandRank,
2538 "Output rank should be <= 4", &val_ctx);
2539 if (node->inputs->size >= 2) {
2540 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2541 kTfLiteMmapRo,
2542 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2543 "The shape input tensor must be constant.", &val_ctx);
2544 }
2545 if (node->inputs->size == 1) {
2546 // reject scalar reshaping
2547 auto* params =
2548 reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
2549 int num_dimensions = params->num_dimensions;
2550 if (num_dimensions == 1 && params->shape[0] == 0) {
2551 // Legacy tflite models use a shape parameter of [0] to indicate
2552 // scalars.
2553 num_dimensions = 0;
2554 }
2555 Expect(num_dimensions > 0,
2556 NNAPIValidationFailureType::kUnsupportedOperandRank,
2557 "New shape rank should be > 0", &val_ctx);
2558 }
2559 } break;
2560 case kTfLiteBuiltinResizeBilinear: {
2561 ExpectMaxOpVersion(version, 3, &val_ctx);
2562 const auto& input = context->tensors[node->inputs->data[0]];
2563 const auto output_dims = context->tensors[node->outputs->data[0]].dims;
2564 Expect(input.dims->size == 4,
2565 NNAPIValidationFailureType::kUnsupportedOperandRank,
2566 "Input should have rank 4", &val_ctx);
2567 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2568 Expect(node->inputs->size >= 2,
2569 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2570 "Expected at least 2 inputs", &val_ctx);
2571 if (node->inputs->size >= 2) {
2572 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2573 kTfLiteMmapRo,
2574 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2575 "The size input tensor must be constant.", &val_ctx);
2576 }
2577 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2578 Expect(output_dims->data[1] == output_dims->data[2],
2579 NNAPIValidationFailureType::kUnsupportedOperandValue,
2580 "Require width == height due to driver differences in NNAPI "
2581 "< 1.2",
2582 &val_ctx);
2583 }
2584 auto builtin =
2585 reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
2586 if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2587 Expect(!builtin->align_corners,
2588 NNAPIValidationFailureType::kUnsupportedOperandValue,
2589 "NNAPI does not support align_corners == true.", &val_ctx);
2590 Expect(!builtin->half_pixel_centers,
2591 NNAPIValidationFailureType::kUnsupportedOperandValue,
2592 "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2593 }
2594 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2595 Expect(input.type == kTfLiteFloat32,
2596 NNAPIValidationFailureType::kUnsupportedInputType,
2597 "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
2598 }
2599 } break;
2600 case kTfLiteBuiltinResizeNearestNeighbor: {
2601 ExpectMaxOpVersion(version, 3, &val_ctx);
2602 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2603 &val_ctx);
2604 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2605 Expect(node->inputs->size >= 2,
2606 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2607 "Expected at least 2 inputs", &val_ctx);
2608 if (node->inputs->size >= 2) {
2609 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2610 kTfLiteMmapRo,
2611 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2612 "The size input tensor must be constant.", &val_ctx);
2613 }
2614 auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
2615 node->builtin_data);
2616 if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2617 Expect(!builtin->align_corners,
2618 NNAPIValidationFailureType::kUnsupportedOperandValue,
2619 "NNAPI does not support align_corners == true.", &val_ctx);
2620 Expect(!builtin->half_pixel_centers,
2621 NNAPIValidationFailureType::kUnsupportedOperandValue,
2622 "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2623 }
2624 } break;
2625 case kTfLiteBuiltinSqueeze: {
2626 ExpectOpVersion(version, 1, &val_ctx);
2627 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2628 &val_ctx);
2629 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
2630 if (android_sdk_version == kMinSdkVersionForNNAPI11) {
2631 Expect(builtin->num_squeeze_dims != 0,
2632 NNAPIValidationFailureType::kUnsupportedOperandValue,
2633 "NNAPI 1.1 does not support null squeeze_dims properly.",
2634 &val_ctx);
2635 }
2636 } break;
2637 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2638 ExpectMaxOpVersion(version, 2, &val_ctx);
2639 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2640 &val_ctx);
2641
2642 Expect(!IsHybridOperator(context, builtin_code, node),
2643 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2644 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2645
2646 Expect(node->inputs->size == 20 || node->inputs->size == 24,
2647 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2648 "Supporting only operation with 20 or 24 inputs", &val_ctx);
2649 } break;
2650 case kTfLiteBuiltinL2Normalization: {
2651 ExpectMaxOpVersion(version, 2, &val_ctx);
2652
2653 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2654 ExpectIsFloatOperator(context, node, &val_ctx);
2655
2656 const auto& input = context->tensors[node->inputs->data[0]];
2657 Expect(input.dims->size == 4,
2658 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2659 "Expected 4 inputs", &val_ctx);
2660 }
2661 auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
2662 Expect(builtin->activation == kTfLiteActNone,
2663 NNAPIValidationFailureType::kNoActivationExpected,
2664 "Expected no activation", &val_ctx);
2665 } break;
2666 case kTfLiteBuiltinLocalResponseNormalization: {
2667 ExpectOpVersion(version, 1, &val_ctx);
2668 } break;
2669 case kTfLiteBuiltinLshProjection: {
2670 ExpectOpVersion(version, 1, &val_ctx);
2671
2672 if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
2673 ->type == kTfLiteLshProjectionSparse) {
2674 // NNAPI does not support sparse projection correctly pre-Q
2675 // (b/111751836).
2676 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2677 NNAPIValidationFailureType::kUnsupportedInputType,
2678 "NNAPI does not support sparse projection correctly pre-Q",
2679 &val_ctx);
2680 Expect(node->inputs->size == 2,
2681 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2682 " NNAPI does not support weights for sparse projects.",
2683 &val_ctx);
2684 }
2685 } break;
2686 case kTfLiteBuiltinConcatenation: {
2687 ExpectMaxOpVersion(version, 2, &val_ctx);
2688 Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
2689 ->activation == kTfLiteActNone,
2690 NNAPIValidationFailureType::kNoActivationExpected,
2691 "No activation function supported", &val_ctx);
2692 Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
2693 NNAPIValidationFailureType::kUnsupportedOperandRank,
2694 "Input rank should be less than 4", &val_ctx);
2695
2696 const auto& input_type = context->tensors[node->inputs->data[0]].type;
2697 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2698 kTfLiteUInt8, kTfLiteInt8);
2699
2700 if (input_type == kTfLiteUInt8 &&
2701 android_sdk_version < kMinSdkVersionForNNAPI12) {
2702 auto first_param = context->tensors[node->inputs->data[0]].params;
2703 for (int i = 1; i < node->inputs->size; i++) {
2704 auto curr_param = context->tensors[node->inputs->data[i]].params;
2705 if (!Expect(curr_param.scale == first_param.scale &&
2706 curr_param.zero_point == first_param.zero_point,
2707 NNAPIValidationFailureType::kUnsupportedOperandValue,
2708 "NNAPI 1.0-1 only supported concatenating quantized "
2709 "tensor of the same scale and offset.",
2710 &val_ctx)) {
2711 break;
2712 }
2713 }
2714 }
2715 } break;
2716 case kTfLiteBuiltinDequantize: {
2717 // Allow dequantizing fp16->fp32.
2718 if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2719 context->tensors[node->inputs->data[0]].type == kTfLiteFloat16 &&
2720 context->tensors[node->inputs->data[0]].allocation_type !=
2721 kTfLiteMmapRo) {
2722 return true;
2723 }
2724 Expect(version == 1 || version == 2,
2725 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2726 "Supported op versions are 1 and 2 only", &val_ctx);
2727
2728 const auto& input = context->tensors[node->inputs->data[0]];
2729 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2730 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
2731 } else {
2732 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
2733
2734 if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
2735 input.type == kTfLiteInt8) {
2736 const auto zero_point = input.params.zero_point;
2737 Expect(zero_point == 0,
2738 NNAPIValidationFailureType::kUnsupportedInputType,
2739 "NN API supports int8 type since version 1.2 but only for "
2740 "symmetric quantization.",
2741 &val_ctx);
2742 }
2743 }
2744 } break;
2745 case kTfLiteBuiltinDensify: {
2746 // Allow densifying sparse weights.
2747 if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2748 context->tensors[node->inputs->data[0]].allocation_type ==
2749 kTfLiteMmapRo) {
2750 return true;
2751 }
2752 return false;
2753 } break;
2754 case kTfLiteBuiltinFloor: {
2755 ExpectOpVersion(version, 1, &val_ctx);
2756 } break;
2757 case kTfLiteBuiltinRelu:
2758 case kTfLiteBuiltinReluN1To1:
2759 case kTfLiteBuiltinRelu6:
2760 case kTfLiteBuiltinLogistic: {
2761 ExpectMaxOpVersion(version, 2, &val_ctx);
2762 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2763 } break;
2764 case kTfLiteBuiltinTanh: {
2765 ExpectMaxOpVersion(version, 2, &val_ctx);
2766 const TfLiteType input_type =
2767 context->tensors[node->inputs->data[0]].type;
2768 Expect(IsFloat(input_type) ||
2769 (IsQuantized(input_type) &&
2770 android_sdk_version >= kMinSdkVersionForNNAPI12),
2771 NNAPIValidationFailureType::kUnsupportedInputType,
2772 " NNAPI only support float tanh.", &val_ctx);
2773 } break;
2774 case kTfLiteBuiltinSub: {
2775 ExpectMaxOpVersion(version, 3, &val_ctx);
2776 const TfLiteType input_type =
2777 context->tensors[node->inputs->data[0]].type;
2778 Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
2779 IsFloat(input_type)) ||
2780 (android_sdk_version >= kMinSdkVersionForNNAPI12 &&
2781 IsQuantized(input_type)) ||
2782 (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2783 IsInt32(input_type)),
2784 NNAPIValidationFailureType::kUnsupportedInputType,
2785 "NNAPI only support float sub.", &val_ctx);
2786 if (IsInt32(input_type)) {
2787 Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
2788 ->activation == kTfLiteActNone,
2789 NNAPIValidationFailureType::kNoActivationExpected,
2790 "No activation function supported", &val_ctx);
2791 }
2792 const int input0_rank =
2793 context->tensors[node->inputs->data[0]].dims->size;
2794 const int input1_rank =
2795 context->tensors[node->inputs->data[1]].dims->size;
2796 Expect(input0_rank <= 4 && input1_rank <= 4,
2797 NNAPIValidationFailureType::kUnsupportedOperandRank,
2798 "Input rank must be <= 4", &val_ctx);
2799 } break;
2800 case kTfLiteBuiltinDiv: {
2801 ExpectOpVersion(version, 1, &val_ctx);
2802 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2803 &val_ctx);
2804 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2805 NNAPIValidationFailureType::kUnsupportedInputType,
2806 "NNAPI only support float div.", &val_ctx);
2807 } break;
2808 case kTfLiteBuiltinPad:
2809 case kTfLiteBuiltinPadv2: {
2810 ExpectMaxOpVersion(version, 2, &val_ctx);
2811 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2812 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2813 &val_ctx);
2814
2815 const TfLiteIntArrayView input_shape(
2816 context->tensors[node->inputs->data[0]].dims);
2817 Expect(!HasZeroes(input_shape),
2818 NNAPIValidationFailureType::kUnsupportedOperandValue,
2819 "NN API pad ops do not support input tensors with no elements",
2820 &val_ctx);
2821
2822 Expect(node->inputs->size >= 2,
2823 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2824 "Expecting at least 2 inputs", &val_ctx);
2825
2826 if (node->inputs->size == 3) {
2827 // This is going to be mapped with a PadV2
2828 Expect(
2829 android_sdk_version >= kMinSdkVersionForNNAPI12,
2830 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2831 "Specification of the padding value is supported from NNAPI 1.2.",
2832 &val_ctx);
2833 } else { // this is going to be mapped as Pad
2834 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2835 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2836 NNAPIValidationFailureType::kUnsupportedInputType,
2837 "Only Float32 inputs are supported before NNAPI 1.2",
2838 &val_ctx);
2839 }
2840 }
2841 } break;
2842 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
2843 ExpectOpVersion(version, 1, &val_ctx);
2844 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2845 &val_ctx);
2846 Expect(!IsHybridOperator(context, builtin_code, node),
2847 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2848 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2849 } break;
2850 case kTfLiteBuiltinSpaceToBatchNd: {
2851 ExpectMaxOpVersion(version, 2, &val_ctx);
2852 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2853 &val_ctx);
2854 } break;
2855 case kTfLiteBuiltinBatchToSpaceNd: {
2856 ExpectMaxOpVersion(version, 2, &val_ctx);
2857 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2858 &val_ctx);
2859 auto crops = context->tensors[node->inputs->data[2]];
2860 auto crops_data = crops.data.i32;
2861 Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
2862 crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
2863 NNAPIValidationFailureType::kUnsupportedOperandValue,
2864 "All crops should be 0.", &val_ctx);
2865 } break;
2866 case kTfLiteBuiltinStridedSlice: {
2867 ExpectMaxOpVersion(version, 2, &val_ctx);
2868 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2869 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2870 &val_ctx);
2871 } break;
2872 case kTfLiteBuiltinTranspose: {
2873 ExpectMaxOpVersion(version, 2, &val_ctx);
2874 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2875 &val_ctx);
2876 // Note that the permutation input tensor value dictates the output
2877 // dimensions.
2878 // TODO(b/110888333): Support dynamically-sized tensors in delegates.
2879 Expect((node->inputs->size > 1) &&
2880 (context->tensors[node->inputs->data[1]].allocation_type ==
2881 kTfLiteMmapRo),
2882 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2883 "Dynamically-sized tensors not supported.", &val_ctx);
2884 } break;
2885 case kTfLiteBuiltinAbs:
2886 case kTfLiteBuiltinExp:
2887 case kTfLiteBuiltinLog:
2888 case kTfLiteBuiltinPow: {
2889 ExpectOpVersion(version, 1, &val_ctx);
2890 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2891 &val_ctx);
2892 ExpectIsFloatOperator(context, node, &val_ctx);
2893 } break;
2894 case kTfLiteBuiltinRsqrt: {
2895 ExpectOpVersion(version, 2, &val_ctx);
2896 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2897 &val_ctx);
2898 if (android_sdk_version < kNNAPIRuntimeFeatureLevel7) {
2899 ExpectIsFloatOperator(context, node, &val_ctx);
2900 } else {
2901 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2902 }
2903 } break;
2904 case kTfLiteBuiltinSlice: {
2905 ExpectMaxOpVersion(version, 2, &val_ctx);
2906 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2907 &val_ctx);
2908 const auto input_type = context->tensors[node->inputs->data[0]].type;
2909 const auto begin_type = context->tensors[node->inputs->data[1]].type;
2910 const auto size_type = context->tensors[node->inputs->data[2]].type;
2911 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2912 kTfLiteUInt8, kTfLiteInt8);
2913 Expect(begin_type == kTfLiteInt32,
2914 NNAPIValidationFailureType::kUnsupportedInputType,
2915 "Begin type should be Int32", &val_ctx);
2916 Expect(size_type == kTfLiteInt32,
2917 NNAPIValidationFailureType::kUnsupportedInputType,
2918 "Size type should be Int32", &val_ctx);
2919 } break;
2920 case kTfLiteBuiltinSin: {
2921 ExpectOpVersion(version, 1, &val_ctx);
2922 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2923 &val_ctx);
2924 ExpectIsFloatOperator(context, node, &val_ctx);
2925 } break;
2926 case kTfLiteBuiltinTransposeConv: {
2927 ExpectMaxOpVersion(version, 3, &val_ctx);
2928 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2929 &val_ctx);
2930 Expect((node->inputs->size > 1) &&
2931 (context->tensors[node->inputs->data[0]].allocation_type ==
2932 kTfLiteMmapRo) &&
2933 (context->tensors[node->inputs->data[1]].allocation_type ==
2934 kTfLiteMmapRo),
2935 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2936 "Dynamically-sized tensors not supported.", &val_ctx);
2937 } break;
2938 case kTfLiteBuiltinSqrt: {
2939 ExpectOpVersion(version, 1, &val_ctx);
2940 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2941 &val_ctx);
2942 ExpectIsFloatOperator(context, node, &val_ctx);
2943 } break;
2944 case kTfLiteBuiltinRnn: {
2945 ExpectOpVersion(version, 1, &val_ctx);
2946 Expect(node->inputs->size == 5,
2947 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2948 "Expected 5 input", &val_ctx);
2949 if (node->inputs->size >= 2) {
2950 Expect(
2951 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2952 kTfLiteFloat32,
2953 NNAPIValidationFailureType::kUnsupportedInputType,
2954 "NNAPI only support float32 weights.", &val_ctx);
2955 }
2956 } break;
2957 case kTfLiteBuiltinSpaceToDepth: {
2958 ExpectMaxOpVersion(version, 2, &val_ctx);
2959 const TfLiteType input_type =
2960 context->tensors[node->inputs->data[0]].type;
2961 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2962 kTfLiteInt8);
2963 } break;
2964 case kTfLiteBuiltinSvdf: {
2965 ExpectOpVersion(version, 1, &val_ctx);
2966 Expect(node->inputs->size == 5,
2967 NNAPIValidationFailureType::kUnsupportedOperandRank,
2968 "Expected input of rank 5", &val_ctx);
2969 if (node->inputs->size >= 2) {
2970 Expect(
2971 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2972 kTfLiteFloat32,
2973 NNAPIValidationFailureType::kUnsupportedInputType,
2974 "NNAPI only support float32 weights.", &val_ctx);
2975 }
2976 Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
2977 NNAPIValidationFailureType::kUnsupportedOperandRank,
2978 "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
2979 Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
2980 .type == kTfLiteFloat32,
2981 NNAPIValidationFailureType::kUnsupportedInputType,
2982 "Weights should be Float32", &val_ctx);
2983 } break;
2984 case kTfLiteBuiltinLstm: {
2985 ExpectMaxOpVersion(version, 3, &val_ctx);
2986 Expect(
2987 android_sdk_version >= kMinSdkVersionForNNAPI11,
2988 NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2989 "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
2990 &val_ctx);
2991 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
2992 !IsHybridOperator(context, builtin_code, node),
2993 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2994 "Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
2995
2996 const auto weight_input_index =
2997 isLstmBasicKernel(node) ? 2 /* basic::kInputWeights */
2998 : 4 /* full::kInputToOutputWeightsTensor */;
2999
3000 const TfLiteType weight_type =
3001 context->tensors[node->inputs->data[weight_input_index]].type;
3002
3003 if (isLstmBasicKernel(node)) {
3004 Expect(weight_type == kTfLiteUInt8,
3005 NNAPIValidationFailureType::kUnsupportedInputType,
3006 "Basic LSTM Kernels support only UINT8 weights", &val_ctx);
3007
3008 const auto input_quantization_params =
3009 context->tensors[node->inputs->data[0]].params;
3010 Expect(input_quantization_params.scale == 1. / 128. &&
3011 input_quantization_params.zero_point == 128,
3012 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3013 "Invalid input quantization", &val_ctx);
3014
3015 const auto output_quantization_params =
3016 context->tensors[node->outputs->data[0]].params;
3017 Expect(output_quantization_params.scale == 1. / 128. &&
3018 output_quantization_params.zero_point == 128,
3019 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3020 "Invalid output quantization", &val_ctx);
3021
3022 const auto cell_state_quantization_params =
3023 context->tensors[node->outputs->data[1]].params;
3024 Expect(cell_state_quantization_params.scale == 16. / 32768. ||
3025 cell_state_quantization_params.zero_point == 0,
3026 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3027 "Invalid cell state quantization", &val_ctx);
3028
3029 auto is_const_tensor = [&node, &context](int tensor_idx) {
3030 return context->tensors[node->inputs->data[tensor_idx]]
3031 .allocation_type == kTfLiteMmapRo;
3032 };
3033
3034 Expect(is_const_tensor(2 /* kInputWeights */),
3035 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
3036 "Weights tensor should be constant", &val_ctx);
3037 Expect(is_const_tensor(3 /* kInputBiases */),
3038 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
3039 "Biases tensor should be constant", &val_ctx);
3040
3041 return val_ctx.is_valid;
3042 } else {
3043 if (node->inputs->size == 24) {
3044 ExpectMinAndroidSdkVersion(android_sdk_version,
3045 kMinSdkVersionForNNAPI12, &val_ctx);
3046 }
3047
3048 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3049 Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
3050 weight_type == kTfLiteInt8,
3051 NNAPIValidationFailureType::kUnsupportedInputType,
3052 "Weight has to be Float32 or UINT8 or INT8", &val_ctx);
3053 } else {
3054 Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
3055 NNAPIValidationFailureType::kUnsupportedInputType,
3056 "Weight has to be Float32 or UINT8", &val_ctx);
3057 }
3058 }
3059 } break;
3060 case kTfLiteBuiltinMean: {
3061 ExpectMaxOpVersion(version, 2, &val_ctx);
3062 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
3063 &val_ctx);
3064 if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
3065 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
3066 IsQuantized(context->tensors[node->inputs->data[0]].type),
3067 NNAPIValidationFailureType::kUnsupportedInputType,
3068 "Expected Float32 or Quantized input", &val_ctx);
3069 } else {
3070 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
3071 NNAPIValidationFailureType::kUnsupportedInputType,
3072 "Expected Float32 input", &val_ctx);
3073 }
3074 Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
3075 NNAPIValidationFailureType::kUnsupportedOutputType,
3076 "NNAPI does not support generating a scalar as output for MEAN.",
3077 &val_ctx);
3078 } break;
3079 case kTfLiteBuiltinEmbeddingLookup: {
3080 ExpectOpVersion(version, 1, &val_ctx);
3081 Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
3082 NNAPIValidationFailureType::kUnsupportedInputType,
3083 "NNAPI only support float32 values.", &val_ctx);
3084 } break;
3085 case kTfLiteBuiltinHashtableLookup: {
3086 ExpectOpVersion(version, 1, &val_ctx);
3087 Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
3088 NNAPIValidationFailureType::kUnsupportedOutputType,
3089 "NNAPI only support float32 output.", &val_ctx);
3090 } break;
3091 case kTfLiteBuiltinMaximum:
3092 case kTfLiteBuiltinMinimum: {
3093 ExpectMaxOpVersion(version, 3, &val_ctx);
3094 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3095 &val_ctx);
3096 const auto input_type = context->tensors[node->inputs->data[0]].type;
3097 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3098 kTfLiteInt8, kTfLiteInt32);
3099 const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
3100 if (operand0.dims->size == 0) {
3101 Expect(operand0.allocation_type == kTfLiteMmapRo,
3102 NNAPIValidationFailureType::kUnsupportedInputType,
3103 "Scalar operand should be constant", &val_ctx);
3104 }
3105 const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
3106 if (operand1.dims->size == 0) {
3107 Expect(operand1.allocation_type == kTfLiteMmapRo,
3108 NNAPIValidationFailureType::kUnsupportedInputType,
3109 "Scalar operand should be constant", &val_ctx);
3110 }
3111 } break;
3112 case kTfLiteBuiltinCast: {
3113 ExpectOpVersion(version, 1, &val_ctx);
3114 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3115 &val_ctx);
3116 const TfLiteType input_type =
3117 context->tensors[node->inputs->data[0]].type;
3118 const TfLiteType output_type =
3119 context->tensors[node->outputs->data[0]].type;
3120 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3121 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
3122 kTfLiteUInt8, kTfLiteInt8);
3123
3124 ExpectTypeIn(
3125 output_type,
3126 {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
3127 NNAPIValidationFailureType::kUnsupportedOutputType,
3128 "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
3129 "kTfLiteUInt8, kTfLiteInt8.",
3130 &val_ctx);
3131 } else {
3132 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
3133 kTfLiteUInt8);
3134
3135 ExpectTypeIn(
3136 output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
3137 NNAPIValidationFailureType::kUnsupportedOutputType,
3138 "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
3139 "kTfLiteUInt8.",
3140 &val_ctx);
3141 }
3142 } break;
3143 case kTfLiteBuiltinLeakyRelu:
3144 case kTfLiteBuiltinPrelu: {
3145 ExpectOpVersion(version, 1, &val_ctx);
3146 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3147 &val_ctx);
3148 const auto input_type = context->tensors[node->inputs->data[0]].type;
3149 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3150 kTfLiteInt8);
3151 } break;
3152 case kTfLiteBuiltinTile: {
3153 ExpectOpVersion(version, 1, &val_ctx);
3154 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3155 &val_ctx);
3156 const auto input_type = context->tensors[node->inputs->data[0]].type;
3157 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
3158 kTfLiteUInt8, kTfLiteInt32);
3159 const auto multipliers_type =
3160 context->tensors[node->inputs->data[1]].type;
3161 Expect(multipliers_type == kTfLiteInt32,
3162 NNAPIValidationFailureType::kUnsupportedInputType,
3163 "Multipliers should be Int32", &val_ctx);
3164 } break;
3165 case kTfLiteBuiltinLogicalOr:
3166 case kTfLiteBuiltinLogicalAnd:
3167 case kTfLiteBuiltinLogicalNot: {
3168 ExpectOpVersion(version, 1, &val_ctx);
3169 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3170 &val_ctx);
3171 const auto input_type = context->tensors[node->inputs->data[0]].type;
3172 Expect(input_type == kTfLiteBool,
3173 NNAPIValidationFailureType::kUnsupportedInputType,
3174 "Input should be bool", &val_ctx);
3175 } break;
3176 case kTfLiteBuiltinLess:
3177 case kTfLiteBuiltinLessEqual:
3178 case kTfLiteBuiltinGreater:
3179 case kTfLiteBuiltinGreaterEqual:
3180 case kTfLiteBuiltinEqual:
3181 case kTfLiteBuiltinNotEqual: {
3182 ExpectMaxOpVersion(version, 2, &val_ctx);
3183 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3184 &val_ctx);
3185 const auto input_type = context->tensors[node->inputs->data[0]].type;
3186 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3187 kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
3188 } break;
3189 case kTfLiteBuiltinNeg: {
3190 ExpectMaxOpVersion(version, 2, &val_ctx);
3191 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3192 &val_ctx);
3193 const auto input_type = context->tensors[node->inputs->data[0]].type;
3194 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
3195 } break;
3196 case kTfLiteBuiltinTopkV2: {
3197 ExpectMaxOpVersion(version, 2, &val_ctx);
3198 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3199 &val_ctx);
3200 const auto& input_type = context->tensors[node->inputs->data[0]].type;
3201 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
3202 kTfLiteUInt8, kTfLiteInt8);
3203 const auto& k_param = context->tensors[node->inputs->data[1]];
3204 Expect(k_param.type == kTfLiteInt32 &&
3205 k_param.allocation_type == kTfLiteMmapRo,
3206 NNAPIValidationFailureType::kUnsupportedInputType,
3207 "K param should be a constant of type Int32", &val_ctx);
3208 } break;
3209 case kTfLiteBuiltinSelect: {
3210 ExpectMaxOpVersion(version, 2, &val_ctx);
3211 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3212 &val_ctx);
3213 const auto value_type = context->tensors[node->inputs->data[1]].type;
3214 EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
3215 kTfLiteUInt8, kTfLiteInt8);
3216 TfLiteIntArray* condition_shape =
3217 context->tensors[node->inputs->data[0]].dims;
3218 TfLiteIntArray* input_shape =
3219 context->tensors[node->inputs->data[1]].dims;
3220 Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
3221 NNAPIValidationFailureType::kUnsupportedOperandValue,
3222 "Condition and inputs tensors should have the same shape",
3223 &val_ctx);
3224 } break;
3225 case kTfLiteBuiltinGather: {
3226 ExpectOpVersion(version, 2, &val_ctx);
3227 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3228 &val_ctx);
3229 const auto input_type = context->tensors[node->inputs->data[0]].type;
3230 const auto& positions = context->tensors[node->inputs->data[1]];
3231
3232 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
3233 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
3234
3235 Expect(positions.type == kTfLiteInt32,
3236 NNAPIValidationFailureType::kUnsupportedInputType,
3237 "Positions type should be one of kTfLiteInt32", &val_ctx);
3238 Expect(positions.dims->size != 0,
3239 NNAPIValidationFailureType::kUnsupportedOperandRank,
3240 "0-dimension args are not supported by NNAPI.", &val_ctx);
3241 } break;
3242 case kTfLiteBuiltinBidirectionalSequenceLstm: {
3243 ExpectOpVersion(version, 1, &val_ctx);
3244 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3245 &val_ctx);
3246 Expect(!IsHybridOperator(context, builtin_code, node),
3247 NNAPIValidationFailureType::kUnsupportedHybridOperator,
3248 "Hybrid version of this op is not supported by NN API.", &val_ctx);
3249 } break;
3250 case kTfLiteBuiltinExpandDims: {
3251 ExpectOpVersion(version, 1, &val_ctx);
3252 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3253 &val_ctx);
3254 const auto input_type = context->tensors[node->inputs->data[0]].type;
3255 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
3256 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
3257 const auto axis = context->tensors[node->inputs->data[1]];
3258 Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
3259 NNAPIValidationFailureType::kUnsupportedInputType,
3260 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
3261 } break;
3262 case kTfLiteBuiltinSplit: {
3263 ExpectOpVersion(version, 3, &val_ctx);
3264 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3265 &val_ctx);
3266 // Tensor indices: split_dim: 0, value: 1
3267 const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
3268 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3269 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
3270 kTfLiteInt8, kTfLiteInt32);
3271 } else {
3272 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
3273 kTfLiteInt32);
3274 }
3275 const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
3276 Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
3277 NNAPIValidationFailureType::kUnsupportedInputType,
3278 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
3279 } break;
3280 case kTfLiteBuiltinSplitV: {
3281 ExpectOpVersion(version, 2, &val_ctx);
3282 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3283 &val_ctx);
3284 // Tensor indices: value: 0, size_splits: 1, axis: 2
3285 const TfLiteTensor& input = context->tensors[node->inputs->data[0]];
3286 const TfLiteTensor& size_splits = context->tensors[node->inputs->data[1]];
3287 const TfLiteTensor& axis = context->tensors[node->inputs->data[2]];
3288 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
3289 kTfLiteInt8, kTfLiteInt32);
3290 bool size_splits_is_int32_const_vector =
3291 size_splits.type == kTfLiteInt32 && size_splits.dims->size == 1 &&
3292 size_splits.allocation_type == kTfLiteMmapRo;
3293 bool axis_is_int32_const =
3294 axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo;
3295 Expect(size_splits_is_int32_const_vector,
3296 NNAPIValidationFailureType::kUnsupportedInputType,
3297 "NNAPI only supports constant int32 size_splits vector.",
3298 &val_ctx);
3299 Expect(axis_is_int32_const,
3300 NNAPIValidationFailureType::kUnsupportedInputType,
3301 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
3302 if (size_splits_is_int32_const_vector && axis_is_int32_const) {
3303 Expect(std::all_of(size_splits.data.i32,
3304 size_splits.data.i32 + size_splits.dims->data[0],
3305 [](auto size) { return size != 0; }),
3306 NNAPIValidationFailureType::kUnsupportedInputType,
3307 "NNAPI only supports non-zero split sizes.", &val_ctx);
3308 Expect(ComputeSplitVUnknownSplitSize(context, node) != 0,
3309 NNAPIValidationFailureType::kUnsupportedInputType,
3310 "NNAPI only supports non-zero split sizes.", &val_ctx);
3311 }
3312 } break;
3313 case kTfLiteBuiltinLogSoftmax: {
3314 ExpectOpVersion(version, 1, &val_ctx);
3315 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3316 &val_ctx);
3317 const auto input_type = context->tensors[node->inputs->data[0]].type;
3318 Expect(input_type == kTfLiteFloat32,
3319 NNAPIValidationFailureType::kUnsupportedInputType,
3320 "Input should be Float32.", &val_ctx);
3321 } break;
3322 case kTfLiteBuiltinQuantize: {
3323 ExpectMaxOpVersion(version, 2, &val_ctx);
3324 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3325 &val_ctx);
3326 const auto value_type = context->tensors[node->inputs->data[0]].type;
3327 Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
3328 NNAPIValidationFailureType::kUnsupportedInputType,
3329 "Value should be quantized or Float32.", &val_ctx);
3330 if (IsQuantized(value_type)) {
3331 const auto quantization_params =
3332 context->tensors[node->inputs->data[0]].params;
3333 Expect(quantization_params.scale > 0.f,
3334 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3335 "Quantization scale should be > 0.", &val_ctx);
3336 }
3337 const auto output_type = context->tensors[node->outputs->data[0]].type;
3338 if (android_sdk_version < kMinSdkVersionForNNAPI13) {
3339 Expect(output_type == kTfLiteUInt8,
3340 NNAPIValidationFailureType::kUnsupportedOutputType,
3341 "Output should be kTfLiteUInt8.", &val_ctx);
3342 } else {
3343 ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
3344 NNAPIValidationFailureType::kUnsupportedOutputType,
3345 "Output should be kTfLiteUInt8.", &val_ctx);
3346 }
3347 const auto quantization_params =
3348 context->tensors[node->outputs->data[0]].params;
3349 Expect(quantization_params.scale > 0.f,
3350 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
3351 "Quantization scale should be > 0.", &val_ctx);
3352 } break;
3353 case kTfLiteBuiltinReduceAny: {
3354 ExpectOpVersion(version, 2, &val_ctx);
3355 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3356 &val_ctx);
3357 Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
3358 NNAPIValidationFailureType::kUnsupportedOutputType,
3359 "NNAPI does not support generating a scalar as output.", &val_ctx);
3360 } break;
3361 case kTfLiteBuiltinReduceMin:
3362 case kTfLiteBuiltinReduceMax: {
3363 ExpectMaxOpVersion(version, 2, &val_ctx);
3364 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3365 &val_ctx);
3366 const auto input_tensor = context->tensors[node->inputs->data[0]];
3367 const auto input_type = input_tensor.type;
3368 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3369 kTfLiteInt8);
3370 Expect(input_tensor.dims->size != 0,
3371 NNAPIValidationFailureType::kUnsupportedOutputType,
3372 "NNAPI does not support generating a scalar as output.", &val_ctx);
3373 } break;
3374 case kTfLiteBuiltinDepthToSpace: {
3375 const TfLiteType input_type =
3376 context->tensors[node->inputs->data[0]].type;
3377 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3378 kTfLiteInt8);
3379 } break;
3380 case kTfLiteBuiltinReduceProd:
3381 case kTfLiteBuiltinSum: {
3382 ExpectOpVersion(version, 1, &val_ctx);
3383 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
3384 &val_ctx);
3385 Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
3386 NNAPIValidationFailureType::kUnsupportedOutputType,
3387 "NNAPI does not support generating a scalar as output", &val_ctx);
3388 const auto input_type = context->tensors[node->inputs->data[0]].type;
3389 Expect(input_type == kTfLiteFloat32,
3390 NNAPIValidationFailureType::kUnsupportedInputType,
3391 "NNAPI only supports floating point input.", &val_ctx);
3392 } break;
3393 case kTfLiteBuiltinElu: {
3394 ExpectOpVersion(version, 1, &val_ctx);
3395 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3396 &val_ctx);
3397 const auto input_type = context->tensors[node->inputs->data[0]].type;
3398 Expect(input_type == kTfLiteFloat32,
3399 NNAPIValidationFailureType::kUnsupportedInputType,
3400 "NNAPI only supports floating point input.", &val_ctx);
3401 } break;
3402 case kTfLiteBuiltinFill: {
3403 ExpectOpVersion(version, 1, &val_ctx);
3404 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3405 &val_ctx);
3406 const auto& dims_tensor = context->tensors[node->inputs->data[0]];
3407 Expect(IsConstantTensor(&dims_tensor),
3408 NNAPIValidationFailureType::kUnsupportedInputType,
3409 "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
3410 EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
3411 if (IsConstantTensor(&dims_tensor)) {
3412 Expect(dims_tensor.dims->data[0] != 0,
3413 NNAPIValidationFailureType::kUnsupportedOperandValue,
3414 "NNAPI doesn't support generating scalars from FILL", &val_ctx);
3415 if (dims_tensor.type == kTfLiteInt64) {
3416 bool fit_in_int32 =
3417 std::all_of(dims_tensor.data.i64,
3418 dims_tensor.data.i64 + dims_tensor.dims->data[0],
3419 [](int64_t dim) {
3420 return std::numeric_limits<int32_t>::min() <= dim &&
3421 dim <= std::numeric_limits<int32_t>::max();
3422 });
3423 Expect(fit_in_int32,
3424 NNAPIValidationFailureType::kUnsupportedOperandValue,
3425 "NNAPI only supports int32 dimensions tensor. If the "
3426 "dimensions type is int64 and they are constant we can "
3427 "convert them to int32 if the value isn't too large.",
3428 &val_ctx);
3429 }
3430 }
3431 const auto& value_tensor = context->tensors[node->inputs->data[1]];
3432 EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
3433 kTfLiteInt64);
3434 if (value_tensor.type == kTfLiteInt64 &&
3435 IsConstantTensor(&value_tensor)) {
3436 Expect(
3437 *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
3438 *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
3439 NNAPIValidationFailureType::kUnsupportedInputType,
3440 "NNAPI only supports int32 input. If the input type is int64 and "
3441 "constant we can convert it to int32 if the value isn't too "
3442 "large.",
3443 &val_ctx);
3444 }
3445 } break;
3446 case kTfLiteBuiltinPack: {
3447 ExpectOpVersion(version, 2, &val_ctx);
3448 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3449 &val_ctx);
3450 const auto input_type = context->tensors[node->inputs->data[0]].type;
3451 if (android_sdk_version >= kNNAPIRuntimeFeatureLevel6) {
3452 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32,
3453 kTfLiteInt8, kTfLiteUInt8);
3454 } else {
3455 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8);
3456 auto builtin = reinterpret_cast<TfLitePackParams*>(node->builtin_data);
3457 Expect(builtin->axis != -1 &&
3458 builtin->axis !=
3459 context->tensors[node->inputs->data[0]].dims->size,
3460 NNAPIValidationFailureType::kUnsupportedOperandValue,
3461 "NNAPI does not support axis being the last dimension",
3462 &val_ctx);
3463 }
3464 } break;
3465 case kTfLiteBuiltinUnpack: {
3466 ExpectOpVersion(version, 2, &val_ctx);
3467 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
3468 &val_ctx);
3469 const auto input_type = context->tensors[node->inputs->data[0]].type;
3470 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
3471 kTfLiteInt8);
3472 Expect(context->tensors[node->inputs->data[0]].dims->size > 1,
3473 NNAPIValidationFailureType::kUnsupportedOperandValue,
3474 "NNAPI does not support unpacking a rank-1 tensor", &val_ctx);
3475 Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
3476 NNAPIValidationFailureType::kUnsupportedOperandValue,
3477 "NNAPI does not support unpacking a tensor with rank > 4",
3478 &val_ctx);
3479 const auto* builtin =
3480 reinterpret_cast<const TfLiteUnpackParams*>(node->builtin_data);
3481 Expect(builtin->axis != -1 &&
3482 builtin->axis !=
3483 context->tensors[node->inputs->data[0]].dims->size - 1,
3484 NNAPIValidationFailureType::kUnsupportedOperandValue,
3485 "NNAPI does not support axis being the last dimension", &val_ctx);
3486 } break;
3487 case kTfLiteBuiltinSquaredDifference: {
3488 ExpectOpVersion(version, 2, &val_ctx);
3489 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
3490 &val_ctx);
3491 const auto input0_type = context->tensors[node->inputs->data[0]].type;
3492 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3493 EXPECT_INPUT_TYPE_IN(input0_type, kTfLiteFloat32, kTfLiteUInt8,
3494 kTfLiteInt8, kTfLiteInt32);
3495 } else if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
3496 EXPECT_INPUT_TYPE_IN(input0_type, kTfLiteFloat32, kTfLiteUInt8);
3497 } else {
3498 EXPECT_INPUT_TYPE_IN(input0_type, kTfLiteFloat32);
3499 }
3500 const int input0_rank =
3501 context->tensors[node->inputs->data[0]].dims->size;
3502 const int input1_rank =
3503 context->tensors[node->inputs->data[1]].dims->size;
3504 Expect(input0_rank <= 4 && input1_rank <= 4,
3505 NNAPIValidationFailureType::kUnsupportedOperandRank,
3506 "NNAPI does not support input rank greater than 4", &val_ctx);
3507 } break;
3508 case kTfLiteBuiltinBatchMatmul: {
3509 ExpectOpVersion(version, 2, &val_ctx);
3510 ExpectMinAndroidSdkVersion(android_sdk_version,
3511 kNNAPIRuntimeFeatureLevel6, &val_ctx);
3512 const auto& input0 = context->tensors[node->inputs->data[0]];
3513 const auto& input1 = context->tensors[node->inputs->data[1]];
3514 EXPECT_INPUT_TYPE_IN(input0.type, kTfLiteFloat32, kTfLiteInt32,
3515 kTfLiteInt8);
3516 Expect(input0.type == input1.type,
3517 NNAPIValidationFailureType::kUnsupportedHybridOperator,
3518 "NNAPI does not support hybrid batch matmul", &val_ctx);
3519 Expect(input0.dims->size <= 4 && input0.dims->size >= 2,
3520 NNAPIValidationFailureType::kUnsupportedOperandRank,
3521 "NNAPI does not support input rank greater than 4 or less than 2",
3522 &val_ctx);
3523 Expect(!IsBroadcastBatchMatMul(context, node),
3524 NNAPIValidationFailureType::kUnsupportedInputType,
3525 "NNAPI does not support broadcast batch matmul", &val_ctx);
3526 } break;
3527 case kTfLiteBuiltinMirrorPad: {
3528 ExpectMaxOpVersion(version, 2, &val_ctx);
3529 ExpectMinAndroidSdkVersion(android_sdk_version,
3530 kNNAPIRuntimeFeatureLevel7, &val_ctx);
3531 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
3532
3533 const TfLiteIntArrayView input_shape(
3534 context->tensors[node->inputs->data[0]].dims);
3535 Expect(!HasZeroes(input_shape),
3536 NNAPIValidationFailureType::kUnsupportedOperandValue,
3537 "NN API pad ops do not support input tensors with no elements",
3538 &val_ctx);
3539 Expect(node->inputs->size == 2,
3540 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
3541 "Expecting 2 inputs", &val_ctx);
3542 } break;
3543 case kTfLiteBuiltinReverseV2: {
3544 ExpectMaxOpVersion(version, 3, &val_ctx);
3545 ExpectMinAndroidSdkVersion(android_sdk_version,
3546 kNNAPIRuntimeFeatureLevel7, &val_ctx);
3547 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
3548 Expect(node->inputs->size == 2,
3549 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
3550 "Expecting 2 inputs", &val_ctx);
3551 } break;
3552 default:
3553 // All other operators are not mapped.
3554 AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
3555 "Unsupported operation type.", &val_ctx);
3556 }
3557 return val_ctx.is_valid;
3558 } // NOLINT(readability/fn_size)
3559
Map(TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const NNAPIOpMappingArgs & mapping_args,ANeuralNetworksOperationType * nn_op_type,NnapiDelegateVendorPlugin * vendor_plugin)3560 TfLiteStatus NNAPIDelegateKernel::Map(
3561 TfLiteContext* context, int builtin_code, int version,
3562 int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
3563 ANeuralNetworksOperationType* nn_op_type,
3564 NnapiDelegateVendorPlugin* vendor_plugin) {
3565 auto add_zero_bias = [mapping_args](int input_id, int filter_id,
3566 int num_elements) -> void {
3567 // NNAPI requires a bias tensor, so we allocate a new tensor to fill
3568 // it with zeroes. It is deleted with other tensors in the context
3569 // during subgraph destructor call.
3570 int bias_index = -1;
3571 mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
3572 TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
3573 const auto input_type = mapping_args.context->tensors[input_id].type;
3574 if (input_type == kTfLiteFloat32) {
3575 bias_tensor->type = kTfLiteFloat32;
3576 } else {
3577 bias_tensor->type = kTfLiteInt32;
3578 }
3579 // Create an array with a required bias shape and resize the bias
3580 // tensor.
3581 TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
3582 bias_shape->data[0] = num_elements;
3583 bias_tensor->allocation_type = kTfLiteDynamic;
3584 mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
3585 bias_shape);
3586 // Set tensor's values to zeroes and add it using AddVector*, so
3587 // that the values are copied to NNAPI. We don't use the AddTensor
3588 // function because it doesn't copy values and the tensor we just
3589 // created is not in the node->inputs.
3590 if (input_type == kTfLiteFloat32) {
3591 memset(bias_tensor->data.f, 0, num_elements * sizeof(float));
3592 mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
3593 num_elements);
3594 } else {
3595 memset(bias_tensor->data.i32, 0, num_elements * sizeof(int));
3596 const TfLiteTensor& input_tensor =
3597 mapping_args.context->tensors[input_id];
3598 const TfLiteTensor& filter_tensor =
3599 mapping_args.context->tensors[filter_id];
3600 // NNAPI requires bias scale to be a product of an input scale and
3601 // a filter scale.
3602 bias_tensor->params.scale =
3603 input_tensor.params.scale * filter_tensor.params.scale;
3604 mapping_args.builder->AddVectorInt32Operand(
3605 bias_tensor->data.i32, num_elements, bias_tensor->params.scale,
3606 /*zero_point=*/0);
3607 }
3608 };
3609 switch (builtin_code) {
3610 case kTfLiteBuiltinAdd: {
3611 auto builtin =
3612 reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
3613 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3614 *nn_op_type = ANEURALNETWORKS_ADD;
3615 } break;
3616 case kTfLiteBuiltinArgMax: {
3617 *nn_op_type = ANEURALNETWORKS_ARGMAX;
3618 } break;
3619 case kTfLiteBuiltinArgMin: {
3620 *nn_op_type = ANEURALNETWORKS_ARGMIN;
3621 } break;
3622 case kTfLiteBuiltinMul: {
3623 auto builtin =
3624 reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
3625 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3626 *nn_op_type = ANEURALNETWORKS_MUL;
3627 } break;
3628 case kTfLiteBuiltinAveragePool2d: {
3629 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3630 *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
3631 } break;
3632 case kTfLiteBuiltinMaxPool2d: {
3633 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3634 *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
3635 } break;
3636 case kTfLiteBuiltinL2Pool2d: {
3637 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3638 *nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
3639 } break;
3640 case kTfLiteBuiltinConv2d: {
3641 auto builtin =
3642 reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
3643 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3644 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3645 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3646 const int input_id = mapping_args.node->inputs->data[/*kInputTensor*/ 0];
3647 const int filter_id =
3648 mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3649 const auto& input_tensor = context->tensors[input_id];
3650 const auto& filter_tensor = context->tensors[filter_id];
3651 auto is_grouped_conv = false;
3652 // Only check grouped convolution if input and filter shape is propagated.
3653 if (input_tensor.dims->size != 0 && filter_tensor.dims->size != 0) {
3654 is_grouped_conv =
3655 input_tensor.dims->data[3] != filter_tensor.dims->data[3];
3656 }
3657 if (is_grouped_conv) {
3658 mapping_args.builder->AddScalarInt32Operand(
3659 input_tensor.dims->data[3] / filter_tensor.dims->data[3]);
3660 }
3661 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3662 // NNAPI supports dilated Conv2D since NNAPI 1.2.
3663 if (builtin->dilation_width_factor != 1 ||
3664 builtin->dilation_height_factor != 1) {
3665 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
3666 mapping_args.builder->AddScalarInt32Operand(
3667 builtin->dilation_width_factor);
3668 mapping_args.builder->AddScalarInt32Operand(
3669 builtin->dilation_height_factor);
3670 }
3671 if (is_grouped_conv) {
3672 *nn_op_type = ANEURALNETWORKS_GROUPED_CONV_2D;
3673 } else {
3674 *nn_op_type = ANEURALNETWORKS_CONV_2D;
3675 }
3676 } break;
3677 case kTfLiteBuiltinDepthwiseConv2d: {
3678 auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
3679 mapping_args.node->builtin_data);
3680 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3681 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3682 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3683 mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
3684 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3685 if (builtin->dilation_width_factor != 1 ||
3686 builtin->dilation_height_factor != 1) {
3687 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format.
3688 mapping_args.builder->AddScalarInt32Operand(
3689 builtin->dilation_width_factor);
3690 mapping_args.builder->AddScalarInt32Operand(
3691 builtin->dilation_height_factor);
3692 }
3693 *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
3694 } break;
3695 case kTfLiteBuiltinFullyConnected: {
3696 const bool is_bias_present =
3697 mapping_args.node->inputs->size == 3 &&
3698 mapping_args.node->inputs->data[2] != kTfLiteOptionalTensor;
3699 if (!is_bias_present) {
3700 const int input_tensor_id =
3701 mapping_args.node->inputs->data[/*kInputTensor*/ 0];
3702 const int filter_tensor_id =
3703 mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3704 const int num_units =
3705 mapping_args.context->tensors[filter_tensor_id].dims->data[0];
3706 add_zero_bias(input_tensor_id, filter_tensor_id, num_units);
3707 }
3708 auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
3709 mapping_args.node->builtin_data);
3710 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3711 *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
3712 } break;
3713 case kTfLiteBuiltinHardSwish: {
3714 *nn_op_type = ANEURALNETWORKS_HARD_SWISH;
3715 } break;
3716 case kTfLiteBuiltinSoftmax: {
3717 auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
3718 mapping_args.node->builtin_data);
3719 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3720 // Optional scalar specifying the dimension the activation would be
3721 // performed on is not added. Default to -1.
3722 *nn_op_type = ANEURALNETWORKS_SOFTMAX;
3723 } break;
3724 case kTfLiteBuiltinReshape: {
3725 if (mapping_args.node->inputs->size == 1) {
3726 // if no new_shape tensor, construct the new shape from params.
3727 auto* params = reinterpret_cast<TfLiteReshapeParams*>(
3728 mapping_args.node->builtin_data);
3729 int num_dimensions = params->num_dimensions;
3730 std::vector<int32_t> output_shape(num_dimensions);
3731 for (int i = 0; i < num_dimensions; ++i) {
3732 output_shape[i] = params->shape[i];
3733 }
3734 mapping_args.builder->AddVectorInt32Operand(
3735 output_shape.data(), static_cast<uint32_t>(num_dimensions));
3736 }
3737 *nn_op_type = ANEURALNETWORKS_RESHAPE;
3738 } break;
3739 case kTfLiteBuiltinResizeBilinear: {
3740 const int output_id = mapping_args.node->outputs->data[0];
3741 auto& output = mapping_args.context->tensors[output_id];
3742 const int output_height = output.dims->data[1];
3743 const int output_width = output.dims->data[2];
3744 mapping_args.builder->AddScalarInt32Operand(output_width);
3745 mapping_args.builder->AddScalarInt32Operand(output_height);
3746 auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
3747 mapping_args.node->builtin_data);
3748 if (builtin->align_corners == true ||
3749 builtin->half_pixel_centers == true) {
3750 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
3751 mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3752 mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3753 }
3754 *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
3755 } break;
3756 case kTfLiteBuiltinResizeNearestNeighbor: {
3757 const TfLiteTensor& new_shape =
3758 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3759 // NNAPI uses scalar inputs for height and width.
3760 mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
3761 mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
3762 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
3763 auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
3764 mapping_args.node->builtin_data);
3765 if (builtin->align_corners == true ||
3766 builtin->half_pixel_centers == true) {
3767 mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3768 mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3769 }
3770 *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
3771 } break;
3772 case kTfLiteBuiltinSqueeze: {
3773 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
3774 mapping_args.node->builtin_data);
3775 // Note that we add the squeeze dimensions even if the dimensions
3776 // were unspecified (empty), as NNAPI requires the operand.
3777 mapping_args.builder->AddVectorInt32Operand(
3778 builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
3779 static_cast<uint32_t>(builtin->num_squeeze_dims));
3780 *nn_op_type = ANEURALNETWORKS_SQUEEZE;
3781 } break;
3782 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
3783 auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
3784 mapping_args.node->builtin_data);
3785 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3786 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3787 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3788 mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3789 const bool hybrid_op = IsHybridOperator(
3790 mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
3791 mapping_args.node);
3792 if (mapping_args.node->inputs->size == 24) {
3793 // Add layer normalization tensors if they are provided.
3794 for (int i = 20; i < 24; ++i) {
3795 const int input_index = mapping_args.node->inputs->data[i];
3796 if (input_index != kTfLiteOptionalTensor) {
3797 mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3798 } else {
3799 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3800 }
3801 }
3802 } else {
3803 for (int i = 0; i < 4; ++i) {
3804 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3805 }
3806 }
3807
3808 *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
3809 } break;
3810 case kTfLiteBuiltinL2Normalization: {
3811 *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
3812 } break;
3813 case kTfLiteBuiltinLocalResponseNormalization: {
3814 auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
3815 mapping_args.node->builtin_data);
3816 mapping_args.builder->AddScalarInt32Operand(builtin->radius);
3817 mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
3818 mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
3819 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3820 *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
3821 } break;
3822 case kTfLiteBuiltinLshProjection: {
3823 auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
3824 mapping_args.node->builtin_data);
3825 int type = builtin->type;
3826 // In Android Q+, NNAPI uses 3 to denote
3827 // kTfLiteLshProjectionSparse.
3828 const int kNNAPILshProjectionSparse = 3;
3829 if (builtin->type == kTfLiteLshProjectionSparse) {
3830 type = kNNAPILshProjectionSparse;
3831 // Add NNAPI null weight operand.
3832 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3833 }
3834 mapping_args.builder->AddScalarInt32Operand(type);
3835 *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
3836 } break;
3837 case kTfLiteBuiltinConcatenation: {
3838 auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
3839 mapping_args.node->builtin_data);
3840 int axis = builtin->axis < 0
3841 ? mapping_args.context
3842 ->tensors[mapping_args.node->inputs->data[0]]
3843 .dims->size +
3844 builtin->axis
3845 : builtin->axis;
3846 mapping_args.builder->AddScalarInt32Operand(axis);
3847 *nn_op_type = ANEURALNETWORKS_CONCATENATION;
3848 } break;
3849 case kTfLiteBuiltinDequantize: {
3850 *nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
3851 } break;
3852 case kTfLiteBuiltinFloor: {
3853 *nn_op_type = ANEURALNETWORKS_FLOOR;
3854 } break;
3855 case kTfLiteBuiltinRelu: {
3856 *nn_op_type = ANEURALNETWORKS_RELU;
3857 } break;
3858 case kTfLiteBuiltinReluN1To1: {
3859 *nn_op_type = ANEURALNETWORKS_RELU1;
3860 } break;
3861 case kTfLiteBuiltinRelu6: {
3862 *nn_op_type = ANEURALNETWORKS_RELU6;
3863 } break;
3864 case kTfLiteBuiltinLogistic: {
3865 *nn_op_type = ANEURALNETWORKS_LOGISTIC;
3866 } break;
3867 case kTfLiteBuiltinTanh: {
3868 *nn_op_type = ANEURALNETWORKS_TANH;
3869 } break;
3870 case kTfLiteBuiltinSub: {
3871 auto builtin =
3872 reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
3873 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3874 *nn_op_type = ANEURALNETWORKS_SUB;
3875 } break;
3876 case kTfLiteBuiltinDiv: {
3877 auto builtin =
3878 reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
3879 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3880 *nn_op_type = ANEURALNETWORKS_DIV;
3881 } break;
3882 case kTfLiteBuiltinPad:
3883 case kTfLiteBuiltinPadv2: {
3884 // We want to map to PAD as much as possible since it is more widely
3885 // supported. We map to PadV2 only when there is the need to specify
3886 // the padding value
3887 if (mapping_args.node->inputs->size == 2) {
3888 *nn_op_type = ANEURALNETWORKS_PAD;
3889 } else {
3890 const int constant_value_id = mapping_args.node->inputs->data[2];
3891 if (constant_value_id == kTfLiteOptionalTensor) {
3892 *nn_op_type = ANEURALNETWORKS_PAD;
3893 } else {
3894 *nn_op_type = ANEURALNETWORKS_PAD_V2;
3895 }
3896 }
3897 } break;
3898 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
3899 auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
3900 mapping_args.node->builtin_data);
3901 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3902 mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
3903 *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
3904 } break;
3905 case kTfLiteBuiltinSpaceToBatchNd: {
3906 *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
3907 } break;
3908 case kTfLiteBuiltinBatchToSpaceNd: {
3909 *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
3910 } break;
3911 case kTfLiteBuiltinStridedSlice: {
3912 auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
3913 mapping_args.node->builtin_data);
3914 mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
3915 mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
3916 mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
3917 *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
3918 } break;
3919 case kTfLiteBuiltinTranspose: {
3920 *nn_op_type = ANEURALNETWORKS_TRANSPOSE;
3921 } break;
3922 case kTfLiteBuiltinAbs: {
3923 *nn_op_type = ANEURALNETWORKS_ABS;
3924 } break;
3925 case kTfLiteBuiltinExp: {
3926 *nn_op_type = ANEURALNETWORKS_EXP;
3927 } break;
3928 case kTfLiteBuiltinLog: {
3929 *nn_op_type = ANEURALNETWORKS_LOG;
3930 } break;
3931 case kTfLiteBuiltinRsqrt: {
3932 *nn_op_type = ANEURALNETWORKS_RSQRT;
3933 } break;
3934 case kTfLiteBuiltinPow: {
3935 *nn_op_type = ANEURALNETWORKS_POW;
3936 } break;
3937 case kTfLiteBuiltinSlice: {
3938 *nn_op_type = ANEURALNETWORKS_SLICE;
3939 } break;
3940 case kTfLiteBuiltinSin: {
3941 *nn_op_type = ANEURALNETWORKS_SIN;
3942 } break;
3943 case kTfLiteBuiltinTransposeConv: {
3944 int input_tensor_flags = 0;
3945 const int input_tensor_id =
3946 mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
3947 const int weight_tensor_id =
3948 mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3949
3950 // Transpose convolution doesn't have hybrid variation.
3951 const bool hybrid_op = false;
3952
3953 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3954 mapping_args.builder->AddTensorInput(
3955 input_tensor_id, hybrid_op,
3956 input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
3957
3958 } else {
3959 mapping_args.builder->AddTensorInput(
3960 input_tensor_id, hybrid_op,
3961 input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
3962 }
3963 // Transpose convlution uses per-channel quantization with int8 inputs
3964 // even if the number of channels in quantization parameters is equal to 1
3965 // (as opposed to conv2d, which uses per-tensor quantization in this
3966 // case).
3967 mapping_args.builder->AddTensorInput(
3968 weight_tensor_id, hybrid_op,
3969 input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
3970
3971 const bool is_bias_present =
3972 mapping_args.node->inputs->size == 4 &&
3973 mapping_args.node->inputs->data[/*kBiasTensor*/ 3] !=
3974 kTfLiteOptionalTensor;
3975
3976 if (is_bias_present) {
3977 mapping_args.builder->AddTensorInput(
3978 mapping_args.node->inputs->data[/*kBiasTensor*/ 3], hybrid_op);
3979 } else {
3980 const TfLiteTensor& output_shape =
3981 mapping_args.context->tensors[mapping_args.node->inputs
3982 ->data[/*kOutputShapeTensor*/ 0]];
3983 const int output_depth = output_shape.data.i32[3];
3984 add_zero_bias(input_tensor_id, weight_tensor_id, output_depth);
3985 }
3986 mapping_args.builder->AddTensorInput(
3987 mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
3988
3989 auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
3990 mapping_args.node->builtin_data);
3991 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3992 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3993 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3994 mapping_args.builder->AddScalarInt32Operand(
3995 /*ANEURALNETWORKS_FUSED_NONE*/ 0);
3996 // Use NHWC layout for input and output.
3997 mapping_args.builder->AddScalarBoolOperand(false);
3998 *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
3999 } break;
4000 case kTfLiteBuiltinSqrt: {
4001 *nn_op_type = ANEURALNETWORKS_SQRT;
4002 } break;
4003 case kTfLiteBuiltinRnn: {
4004 // NNAPI need both state_in and state_out.
4005 int ann_index;
4006 mapping_args.builder->AddStateFloat32Tensor(
4007 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
4008 &ann_index);
4009 mapping_args.model_state_outputs->push_back(ann_index);
4010 mapping_args.model_state_tfl_inputs->push_back(
4011 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
4012 auto builtin =
4013 reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
4014 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4015 *nn_op_type = ANEURALNETWORKS_RNN;
4016 } break;
4017 case kTfLiteBuiltinSpaceToDepth: {
4018 auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
4019 mapping_args.node->builtin_data);
4020 mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
4021 *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
4022 } break;
4023 case kTfLiteBuiltinSvdf: {
4024 // NNAPI need both state_in and state_out.
4025 int ann_index;
4026 mapping_args.builder->AddStateFloat32Tensor(
4027 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
4028 &ann_index);
4029 mapping_args.model_state_outputs->push_back(ann_index);
4030 mapping_args.model_state_tfl_inputs->push_back(
4031 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
4032
4033 auto builtin =
4034 reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
4035 mapping_args.builder->AddScalarInt32Operand(builtin->rank);
4036 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4037 *nn_op_type = ANEURALNETWORKS_SVDF;
4038 } break;
4039 case kTfLiteBuiltinLstm: {
4040 if (isLstmBasicKernel(mapping_args.node)) {
4041 const auto output_dims =
4042 mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
4043 .dims;
4044
4045 // Inputs kInputData
4046 mapping_args.builder->AddTensorInput(
4047 mapping_args.node->inputs->data[0 /* kInputData */],
4048 /* hybrid_op */ false,
4049 /* scalar_as_tensor */ false);
4050
4051 // The 8 weights tensors are set decomposing the
4052 // kInputWeights param
4053 const auto weight_tensor =
4054 mapping_args.context->tensors[mapping_args.node->inputs
4055 ->data[2 /* kInputWeights */]];
4056
4057 std::vector<uint8_t> recurrent_to_input;
4058 std::vector<uint8_t> input_to_input;
4059 std::vector<uint8_t> recurrent_to_cell;
4060 std::vector<uint8_t> input_to_cell;
4061 std::vector<uint8_t> recurrent_to_forget;
4062 std::vector<uint8_t> input_to_forget;
4063 std::vector<uint8_t> recurrent_to_output;
4064 std::vector<uint8_t> input_to_output;
4065 tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
4066 weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
4067 &input_to_input, &recurrent_to_cell, &input_to_cell,
4068 &recurrent_to_forget, &input_to_forget, &recurrent_to_output,
4069 &input_to_output);
4070
4071 TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
4072 TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
4073 tflite::delegate::nnapi::SetWeightSubmatrixDims(
4074 weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
4075
4076 int new_tensor_index = -1;
4077
4078 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4079 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4080 input_weight_dims, input_to_input, weight_tensor.params,
4081 &new_tensor_index);
4082
4083 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4084 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4085 input_weight_dims, input_to_forget, weight_tensor.params,
4086 &new_tensor_index);
4087
4088 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4089 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4090 input_weight_dims, input_to_cell, weight_tensor.params,
4091 &new_tensor_index);
4092
4093 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4094 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4095 input_weight_dims, input_to_output, weight_tensor.params,
4096 &new_tensor_index);
4097
4098 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4099 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4100 recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
4101 &new_tensor_index);
4102
4103 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4104 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4105 recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
4106 &new_tensor_index);
4107
4108 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4109 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4110 recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
4111 &new_tensor_index);
4112
4113 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
4114 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4115 recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
4116 &new_tensor_index);
4117
4118 TfLiteIntArrayFree(input_weight_dims);
4119 TfLiteIntArrayFree(recurrent_weight_dims);
4120
4121 // Biases have to be split in four.
4122 const auto bias_size = output_dims->data[1];
4123 const TfLiteTensor& biases_tensor =
4124 mapping_args.context->tensors[mapping_args.node->inputs
4125 ->data[3 /* kInputBiases */]];
4126
4127 std::vector<int32_t> input_bias;
4128 std::vector<int32_t> cell_bias;
4129 std::vector<int32_t> forget_bias;
4130 std::vector<int32_t> output_bias;
4131 delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
4132 &input_bias, &cell_bias,
4133 &forget_bias, &output_bias);
4134
4135 int input_bias_tensor = -1;
4136 mapping_args.builder->AddNewInputConstantTensor<int32_t>(
4137 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
4138 biases_tensor.params, &input_bias_tensor);
4139 int forget_bias_tensor = -1;
4140 mapping_args.builder->AddNewInputConstantTensor(
4141 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
4142 forget_bias, biases_tensor.params, &forget_bias_tensor);
4143 int cell_gate_bias_tensor = -1;
4144 mapping_args.builder->AddNewInputConstantTensor(
4145 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
4146 biases_tensor.params, &cell_gate_bias_tensor);
4147 int output_gate_bias_tensor = -1;
4148 mapping_args.builder->AddNewInputConstantTensor(
4149 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
4150 output_bias, biases_tensor.params, &output_gate_bias_tensor);
4151
4152 mapping_args.builder->AddTensorInput(
4153 mapping_args.node->inputs->data[4 /* kInputPrevState */],
4154 /* hybrid_op */ false,
4155 /* scalar_as_tensor */ false);
4156
4157 // kInputPrevActivation
4158 mapping_args.builder->AddTensorInput(
4159 mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
4160 /* hybrid_op */ false,
4161 /* scalar_as_tensor */ false);
4162
4163 // Configuring the copy from the activation, state outputs
4164 // to their associated inputs
4165 mapping_args.feedback_loops->push_back(std::make_tuple(
4166 mapping_args.node->outputs->data[0 /*kOutputActivation*/],
4167 mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
4168
4169 mapping_args.feedback_loops->push_back(std::make_tuple(
4170 mapping_args.node->outputs->data[1 /*kOutputState*/],
4171 mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
4172
4173 // OUTPUTS
4174 // Setting only the first two since the remaining ones are
4175 // ignored by NNAPI
4176 mapping_args.builder->AddTensorOutput(
4177 mapping_args.node->outputs->data[1 /* kOutputState */], 0);
4178
4179 mapping_args.builder->AddTensorOutput(
4180 mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
4181
4182 *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
4183 } else {
4184 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
4185 mapping_args.node->builtin_data);
4186 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4187 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
4188 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
4189
4190 // Current NNAPI implementation requires the scratch_buffer as
4191 // output.
4192 mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
4193
4194 // NNAPI need both state_in and state_out for cell_state and
4195 // output_state.
4196 int ann_index;
4197 mapping_args.builder->AddStateFloat32Tensor(
4198 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
4199 &ann_index);
4200 mapping_args.model_state_outputs->push_back(ann_index);
4201 mapping_args.model_state_tfl_inputs->push_back(
4202 mapping_args.node->inputs
4203 ->data[/*kInputActivationStateTensor*/ 18]);
4204 mapping_args.builder->AddStateFloat32Tensor(
4205 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
4206 &ann_index);
4207 mapping_args.model_state_outputs->push_back(ann_index);
4208 mapping_args.model_state_tfl_inputs->push_back(
4209 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
4210
4211 const bool hybrid_op = IsHybridOperator(
4212 mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
4213
4214 if (mapping_args.node->inputs->size == 24) {
4215 for (int i = 20; i < 24; ++i) {
4216 const auto input_index = mapping_args.node->inputs->data[i];
4217 if (input_index != kTfLiteOptionalTensor) {
4218 mapping_args.builder->AddTensorInput(input_index, hybrid_op);
4219 } else {
4220 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
4221 }
4222 }
4223 }
4224
4225 *nn_op_type = ANEURALNETWORKS_LSTM;
4226 }
4227 } break;
4228 case kTfLiteBuiltinMean: {
4229 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4230 mapping_args.node->builtin_data);
4231 int32_t keep_dims = 0;
4232 if (builtin->keep_dims) keep_dims = 1;
4233 mapping_args.builder->AddScalarInt32Operand(keep_dims);
4234 *nn_op_type = ANEURALNETWORKS_MEAN;
4235 } break;
4236 case kTfLiteBuiltinEmbeddingLookup: {
4237 *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
4238 } break;
4239 case kTfLiteBuiltinHashtableLookup: {
4240 *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
4241 } break;
4242 case kTfLiteBuiltinMaximum: {
4243 *nn_op_type = ANEURALNETWORKS_MAXIMUM;
4244 } break;
4245 case kTfLiteBuiltinMinimum: {
4246 *nn_op_type = ANEURALNETWORKS_MINIMUM;
4247 } break;
4248 case kTfLiteBuiltinCast: {
4249 *nn_op_type = ANEURALNETWORKS_CAST;
4250 } break;
4251 case kTfLiteBuiltinLeakyRelu: {
4252 const auto input_type =
4253 mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
4254 .type;
4255 auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
4256 mapping_args.node->builtin_data);
4257
4258 TfLiteTensor alpha_tensor;
4259 alpha_tensor.type = input_type;
4260 alpha_tensor.allocation_type = kTfLiteDynamic;
4261 alpha_tensor.dims = TfLiteIntArrayCreate(1);
4262 alpha_tensor.dims->data[0] = 1;
4263 alpha_tensor.params.zero_point = 0;
4264
4265 int new_tensor_index = -1;
4266 if (input_type == kTfLiteFloat32) {
4267 alpha_tensor.params.scale = 0;
4268 std::vector<float> alpha_value = {builtin->alpha};
4269 mapping_args.builder->AddNewInputConstantTensor(
4270 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
4271 alpha_value, alpha_tensor.params, &new_tensor_index);
4272 } else if (input_type == kTfLiteInt8 &&
4273 android_sdk_version >= kMinSdkVersionForNNAPI13) {
4274 alpha_tensor.params.scale = builtin->alpha;
4275 std::vector<int8_t> alpha_value = {1};
4276 mapping_args.builder->AddNewInputConstantTensor(
4277 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
4278 alpha_tensor.dims, alpha_value, alpha_tensor.params,
4279 &new_tensor_index);
4280 } else {
4281 alpha_tensor.params.scale = builtin->alpha;
4282 std::vector<uint8_t> alpha_value = {1};
4283 mapping_args.builder->AddNewInputConstantTensor(
4284 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
4285 alpha_tensor.dims, alpha_value, alpha_tensor.params,
4286 &new_tensor_index);
4287 }
4288
4289 *nn_op_type = ANEURALNETWORKS_PRELU;
4290 } break;
4291 case kTfLiteBuiltinPrelu: {
4292 *nn_op_type = ANEURALNETWORKS_PRELU;
4293 } break;
4294 case kTfLiteBuiltinTile: {
4295 *nn_op_type = ANEURALNETWORKS_TILE;
4296 } break;
4297 case kTfLiteBuiltinLogicalOr: {
4298 *nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
4299 } break;
4300 case kTfLiteBuiltinLogicalAnd: {
4301 *nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
4302 } break;
4303 case kTfLiteBuiltinLogicalNot: {
4304 *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
4305 } break;
4306 case kTfLiteBuiltinLess: {
4307 *nn_op_type = ANEURALNETWORKS_LESS;
4308 } break;
4309 case kTfLiteBuiltinLessEqual: {
4310 *nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
4311 } break;
4312 case kTfLiteBuiltinGreater: {
4313 *nn_op_type = ANEURALNETWORKS_GREATER;
4314 } break;
4315 case kTfLiteBuiltinGreaterEqual: {
4316 *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
4317 } break;
4318 case kTfLiteBuiltinEqual: {
4319 *nn_op_type = ANEURALNETWORKS_EQUAL;
4320 } break;
4321 case kTfLiteBuiltinNotEqual: {
4322 *nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
4323 } break;
4324 case kTfLiteBuiltinNeg: {
4325 *nn_op_type = ANEURALNETWORKS_NEG;
4326 } break;
4327 case kTfLiteBuiltinTopkV2: {
4328 const TfLiteTensor& k_param =
4329 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
4330 mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
4331 *nn_op_type = ANEURALNETWORKS_TOPK_V2;
4332 } break;
4333 case kTfLiteBuiltinSelect: {
4334 *nn_op_type = ANEURALNETWORKS_SELECT;
4335 } break;
4336 case kTfLiteBuiltinGather: {
4337 auto builtin = reinterpret_cast<TfLiteGatherParams*>(
4338 mapping_args.node->builtin_data);
4339 mapping_args.builder->AddScalarInt32Operand(builtin->axis);
4340 mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
4341 /* hybrid_op */ false,
4342 /* tensor_flags */ 0);
4343 *nn_op_type = ANEURALNETWORKS_GATHER;
4344 } break;
4345 case kTfLiteBuiltinBidirectionalSequenceLstm: {
4346 auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
4347 mapping_args.node->builtin_data);
4348 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
4349 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
4350 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
4351 mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
4352 mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
4353 // TF Lite doesn't support layer normalization in bidirectional
4354 // sequence LSTM, so we insert optional tensors for NNAPI.
4355 for (int i = 0; i < 8; ++i) {
4356 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
4357 }
4358 *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
4359 } break;
4360 case kTfLiteBuiltinExpandDims: {
4361 const TfLiteTensor& axis_param =
4362 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
4363 mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
4364 *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
4365 } break;
4366 case kTfLiteBuiltinSplit: {
4367 const TfLiteTensor& axis =
4368 mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
4369 auto builtin =
4370 reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
4371 mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
4372 mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
4373 *nn_op_type = ANEURALNETWORKS_SPLIT;
4374 } break;
4375 case kTfLiteBuiltinLogSoftmax: {
4376 // Scaling and axis are hardcoded to respectively 1 and -1
4377 // in TFLite.
4378 mapping_args.builder->AddScalarFloat32Operand(1);
4379 mapping_args.builder->AddScalarInt32Operand(-1);
4380 *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
4381 } break;
4382 case kTfLiteBuiltinQuantize: {
4383 auto input_index = mapping_args.node->inputs->data[0];
4384 // NNAPI doesn't support requantization cases but only quantizations
4385 // from float. Dequantizing our input adding a Dequantize node before
4386 // this one.
4387 if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
4388 mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
4389 mapping_args.node_index);
4390 }
4391
4392 *nn_op_type = ANEURALNETWORKS_QUANTIZE;
4393 } break;
4394 case kTfLiteBuiltinReduceAny: {
4395 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4396 mapping_args.node->builtin_data);
4397 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4398 *nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
4399 } break;
4400 case kTfLiteBuiltinReduceMin: {
4401 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4402 mapping_args.node->builtin_data);
4403 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4404 *nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
4405 } break;
4406 case kTfLiteBuiltinReduceMax: {
4407 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4408 mapping_args.node->builtin_data);
4409 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4410 *nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
4411 } break;
4412 case kTfLiteBuiltinDepthToSpace: {
4413 auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
4414 mapping_args.node->builtin_data);
4415 mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
4416 *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
4417 } break;
4418 case kTfLiteBuiltinReduceProd: {
4419 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4420 mapping_args.node->builtin_data);
4421 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4422 *nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
4423 } break;
4424 case kTfLiteBuiltinSum: {
4425 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
4426 mapping_args.node->builtin_data);
4427 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
4428 *nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
4429 } break;
4430 case kTfLiteBuiltinElu: {
4431 mapping_args.builder->AddScalarFloat32Operand(1.0);
4432 *nn_op_type = ANEURALNETWORKS_ELU;
4433 } break;
4434 case kTfLiteBuiltinFill: {
4435 *nn_op_type = ANEURALNETWORKS_FILL;
4436 } break;
4437 case kTfLiteBuiltinBatchMatmul: {
4438 auto builtin = reinterpret_cast<TfLiteBatchMatMulParams*>(
4439 mapping_args.node->builtin_data);
4440 mapping_args.builder->AddScalarBoolOperand(builtin->adj_x);
4441 mapping_args.builder->AddScalarBoolOperand(builtin->adj_y);
4442 *nn_op_type = ANEURALNETWORKS_BATCH_MATMUL;
4443 } break;
4444 case kTfLiteBuiltinPack: {
4445 *nn_op_type = ANEURALNETWORKS_PACK;
4446 } break;
4447 case kTfLiteBuiltinMirrorPad: {
4448 auto builtin = reinterpret_cast<TfLiteMirrorPaddingParams*>(
4449 mapping_args.node->builtin_data);
4450 mapping_args.builder->AddScalarInt32Operand(builtin->mode);
4451 *nn_op_type = ANEURALNETWORKS_MIRROR_PAD;
4452 } break;
4453 case kTfLiteBuiltinReverseV2: {
4454 *nn_op_type = ANEURALNETWORKS_REVERSE;
4455 } break;
4456 default:
4457 // All other operators are not mapped.
4458 return kTfLiteError;
4459 }
4460 return kTfLiteOk;
4461 }
4462
4463 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params,int * nnapi_errno)4464 TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
4465 const TfLiteDelegateParams* params,
4466 int* nnapi_errno) {
4467 for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
4468 nodes_.push_back(node_index);
4469 }
4470
4471 // Initialize densify map and dequantize map.
4472 densify_output_to_node_mapping_ = std::vector<int>(context->tensors_size, -1);
4473 non_const_dequantize_output_to_node_mapping_ =
4474 std::vector<int>(context->tensors_size, -1);
4475 const auto delegate_options =
4476 StatefulNnApiDelegate::GetOptions(params->delegate);
4477 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
4478 ShouldUseTargetDevices(delegate_options, nnapi_)) {
4479 TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
4480 nnapi_errno, &nnapi_devices_));
4481
4482 if (nnapi_devices_.empty()) {
4483 TF_LITE_KERNEL_LOG(
4484 context, "NNAPI delegate requested but no accelerators available.");
4485 return kTfLiteError;
4486 }
4487
4488 if (nnapi_->SL_ANeuralNetworksDiagnostic_registerCallbacks != nullptr) {
4489 nnapi_->SL_ANeuralNetworksDiagnostic_registerCallbacks(
4490 [](const void* nnapi,
4491 const ANeuralNetworksDiagnosticCompilationInfo* info) {
4492 return LogCompilationInfoOnce(static_cast<const NnApi*>(nnapi),
4493 info);
4494 },
4495 [](const void* nnapi,
4496 const ANeuralNetworksDiagnosticExecutionInfo* info) {
4497 return LogExecutionInfoOnce(static_cast<const NnApi*>(nnapi), info);
4498 },
4499 const_cast<NnApi*>(nnapi_));
4500 TFLITE_LOG_PROD(TFLITE_LOG_INFO,
4501 "Registered diagnostics callbacks in NNAPI SL driver"
4502 "SL_ANeuralNetworksDiagnostic_registerCallbacks.");
4503 } else {
4504 TFLITE_LOG_PROD(TFLITE_LOG_WARNING,
4505 "NNAPI SL driver did not implement "
4506 "SL_ANeuralNetworksDiagnostic_registerCallbacks!");
4507 }
4508 }
4509
4510 if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12 &&
4511 delegate_options.allow_dynamic_dimensions &&
4512 delegate_options.vendor_plugin != nullptr) {
4513 TF_LITE_KERNEL_LOG(context,
4514 "Models with dynamic dimensions and vendor plugin is "
4515 "not supported before NNAPI 1.2 (API level 29).");
4516 return kTfLiteError;
4517 }
4518
4519 // Mark the handle backed tensors.
4520 tensor_memory_map_ =
4521 &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
4522
4523 // Initialize tensor max size hints.
4524 tensor_max_size_hints_.resize(context->tensors_size, 0);
4525 for (const auto it : delegate_options.tensor_max_size_hints) {
4526 auto tensor_index = it.first;
4527 // Skip invalid or optional entries.
4528 if (tensor_index >= context->tensors_size || tensor_index < 0) continue;
4529 // Skip tensors with static shape.
4530 if (!HasUnspecifiedDimension(&context->tensors[tensor_index])) continue;
4531 auto max_size_hint = it.second;
4532 tensor_max_size_hints_[tensor_index] = max_size_hint;
4533 }
4534
4535 if (!nn_model_) {
4536 ANeuralNetworksModel* model = nullptr;
4537 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4538 nnapi_->ANeuralNetworksModel_create(&model),
4539 "creating NNAPI model", nnapi_errno);
4540 nn_model_.reset(model);
4541
4542 TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
4543 params->input_tensors,
4544 params->output_tensors, nnapi_errno));
4545 }
4546
4547 auto* cache = StatefulNnApiDelegate::GetCache(params->delegate);
4548 if (cache) {
4549 // Compilation caching is enabled, construct the uint8 token.
4550 uint64_t token_parts[4];
4551 // model_token is incorporated into parition_key by TFLite Serialization.
4552 // NNAPI uses 256-bit key, but we can just tile the unique 64-bit
4553 // fingerprint from TFLite.
4554 auto partition_entry = cache->GetEntryForKernel(kNnapiId, context, params);
4555 token_parts[0] = partition_entry.GetFingerprint();
4556 token_parts[1] = partition_entry.GetFingerprint();
4557 token_parts[2] = partition_entry.GetFingerprint();
4558 token_parts[3] = partition_entry.GetFingerprint();
4559 // TODO(b/172238515): get token size from header instead of hardcoding.
4560 // Allocate one extra 'null' byte to avoid bugs with backends that might
4561 // be doing strlen() on the token ptr.
4562 std::vector<uint8_t> nnapi_cache_token(33, 0);
4563 // Copy the token bits.
4564 uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
4565 for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
4566 nnapi_cache_token[i] = p[i];
4567 }
4568
4569 nn_compilation_cache_token_ = nnapi_cache_token;
4570 }
4571
4572 nn_execution_cache_.SetMaxCacheSize(
4573 delegate_options.max_execution_cache_size);
4574
4575 initialised_ = true;
4576
4577 return kTfLiteOk;
4578 }
4579
Prepare(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)4580 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
4581 TfLiteNode* node, int* nnapi_errno) {
4582 if (!initialised_) {
4583 return kTfLiteError;
4584 }
4585
4586 const auto delegate_options =
4587 StatefulNnApiDelegate::GetOptions(node->delegate);
4588 if (nn_compilation_) {
4589 return kTfLiteOk;
4590 }
4591
4592 ANeuralNetworksCompilation* compilation = nullptr;
4593 if (!nnapi_devices_.empty()) {
4594 // Compile for the selected accelerator.
4595 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4596 context,
4597 nnapi_->ANeuralNetworksCompilation_createForDevices(
4598 nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
4599 &compilation),
4600 "creating NNAPI model for given devices", nnapi_errno);
4601 } else {
4602 // Trying to call ANeuralNetworksCompilation_create when the delegate is
4603 // constructed from a support library would result in a crash.
4604 if (nnapi_->ANeuralNetworksCompilation_create != nullptr) {
4605 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4606 nnapi_->ANeuralNetworksCompilation_create(
4607 nn_model_.get(), &compilation),
4608 "creating NNAPI compilation",
4609 nnapi_errno);
4610 } else {
4611 TF_LITE_KERNEL_LOG(
4612 context,
4613 "Attempted to call ANeuralNetworksCompilation_create from NNAPI "
4614 "delegate that is constructed from a support library");
4615 return kTfLiteError;
4616 }
4617 }
4618
4619 auto preference = delegate_options.execution_preference;
4620 if (preference !=
4621 StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
4622 const int preference_result =
4623 nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
4624 preference);
4625 if (preference_result != ANEURALNETWORKS_NO_ERROR) {
4626 nnapi_->ANeuralNetworksCompilation_free(compilation);
4627 compilation = nullptr;
4628 }
4629 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
4630 "setting compilation preferences",
4631 nnapi_errno);
4632 }
4633
4634 if (!nn_compilation_cache_token_.empty()) {
4635 const char* cache_dir = delegate_options.cache_dir;
4636 const int set_caching_result =
4637 nnapi_->ANeuralNetworksCompilation_setCaching(
4638 compilation, cache_dir, nn_compilation_cache_token_.data());
4639 if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
4640 nnapi_->ANeuralNetworksCompilation_free(compilation);
4641 compilation = nullptr;
4642 }
4643 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
4644 "configuring NNAPI caching", nnapi_errno);
4645 }
4646 // Set compilation timeout if applicable.
4647 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4648 if (delegate_options.max_compilation_timeout_duration_ns > 0) {
4649 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4650 context,
4651 nnapi_->ANeuralNetworksCompilation_setTimeout(
4652 compilation,
4653 delegate_options.max_compilation_timeout_duration_ns),
4654 "setting compilation timeout", nnapi_errno);
4655 }
4656 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4657 context,
4658 nnapi_->ANeuralNetworksCompilation_setPriority(
4659 compilation, delegate_options.execution_priority),
4660 "setting compilation priority", nnapi_errno);
4661 }
4662 if (delegate_options.vendor_compilation_hints && vendor_plugin_) {
4663 TF_LITE_ENSURE_STATUS(vendor_plugin_->ConfigureCompilationHints(
4664 delegate_options.vendor_compilation_hints, compilation));
4665 }
4666 const int finish_result =
4667 nnapi_->ANeuralNetworksCompilation_finish(compilation);
4668 if (finish_result != ANEURALNETWORKS_NO_ERROR) {
4669 nnapi_->ANeuralNetworksCompilation_free(compilation);
4670 compilation = nullptr;
4671 }
4672 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
4673 "completing NNAPI compilation", nnapi_errno);
4674 nn_compilation_.reset(compilation);
4675
4676 bool should_use_burst_mode = delegate_options.use_burst_computation;
4677 // Override should_use_burst_mode to true if the selected NNAPI devices are of
4678 // NNAPI feature level 5 to 7. Starting from NNAPI feature level 8, reusable
4679 // execution is preferred.
4680 if (!nnapi_devices_.empty() &&
4681 target_feature_level_ >= kNNAPIRuntimeFeatureLevel5 &&
4682 target_feature_level_ <= kNNAPIRuntimeFeatureLevel7) {
4683 should_use_burst_mode = true;
4684 }
4685 // Create burst object to be reused across a sequence of executions
4686 if (should_use_burst_mode &&
4687 nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
4688 nnapi_->ANeuralNetworksBurst_create) {
4689 ANeuralNetworksBurst* burst = nullptr;
4690 const int create_burst_result =
4691 nnapi_->ANeuralNetworksBurst_create(nn_compilation_.get(), &burst);
4692 if (create_burst_result != ANEURALNETWORKS_NO_ERROR) {
4693 nnapi_->ANeuralNetworksBurst_free(burst);
4694 burst = nullptr;
4695 }
4696 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, create_burst_result,
4697 "creating NNAPI burst", nnapi_errno);
4698 nn_burst_.reset(burst);
4699 }
4700
4701 return kTfLiteOk;
4702 }
4703
GetOperationsSupportedByTargetNnApiDevices(TfLiteContext * context,std::vector<int> * supported_nodes,int * nnapi_errno)4704 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
4705 TfLiteContext* context, std::vector<int>* supported_nodes,
4706 int* nnapi_errno) {
4707 if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
4708 return kTfLiteError;
4709 }
4710
4711 // Get the number of NNAPI operations mapped.
4712 NnapiMappingContext* mapping_context =
4713 reinterpret_cast<NnapiMappingContext*>(mapping_util_->context);
4714 const int nnapi_model_size =
4715 mapping_context->nnapi_to_tflite_op_mapping_.size();
4716
4717 // Determine the list of operations the device actually supports
4718 std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
4719
4720 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4721 context,
4722 nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
4723 nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
4724 nnapi_ops_support_flags.get()),
4725 "Checking supported operations for devices", nnapi_errno);
4726
4727 // A TfLite op is supported only if all the associated NNAPI ones are.
4728 auto tflite_ops_support_status = std::map<int, bool>();
4729 std::for_each(nodes_.begin(), nodes_.end(),
4730 [&tflite_ops_support_status](int tflite_node_index) {
4731 tflite_ops_support_status[tflite_node_index] = true;
4732 });
4733 for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
4734 nnapi_op_index++) {
4735 const auto tflite_op_index =
4736 mapping_context->nnapi_to_tflite_op_mapping_[nnapi_op_index];
4737 tflite_ops_support_status[tflite_op_index] &=
4738 nnapi_ops_support_flags[nnapi_op_index];
4739 if (!tflite_ops_support_status[tflite_op_index]) {
4740 if (std::count(non_const_dequantize_output_to_node_mapping_.begin(),
4741 non_const_dequantize_output_to_node_mapping_.end(), -1) <
4742 non_const_dequantize_output_to_node_mapping_.size() ||
4743 std::count(densify_output_to_node_mapping_.begin(),
4744 densify_output_to_node_mapping_.end(),
4745 -1) < densify_output_to_node_mapping_.size()) {
4746 // Only allow full model delegation for sparse model.
4747 return kTfLiteOk;
4748 }
4749 }
4750 }
4751
4752 supported_nodes->clear();
4753 std::for_each(nodes_.begin(), nodes_.end(),
4754 [&supported_nodes, &tflite_ops_support_status](int node_index) {
4755 if (tflite_ops_support_status[node_index]) {
4756 supported_nodes->push_back(node_index);
4757 }
4758 });
4759
4760 return kTfLiteOk;
4761 }
4762
Invoke(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)4763 TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
4764 TfLiteNode* node, int* nnapi_errno) {
4765 const bool allow_padding =
4766 nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 &&
4767 nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding != nullptr;
4768 const auto delegate_options =
4769 StatefulNnApiDelegate::GetOptions(node->delegate);
4770
4771 // Executions are not reusable before Android API 31.
4772 bool execution_is_reusable =
4773 nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 &&
4774 delegate_options.max_execution_cache_size > 0;
4775
4776 // The output dynamic dimensions cannot be inferred when using custom ops.
4777 bool can_infer_output_shape = !delegate_options.allow_dynamic_dimensions ||
4778 delegate_options.vendor_plugin == nullptr;
4779
4780 ANeuralNetworksExecution* execution = nullptr;
4781 NNAPIExecutionCache::Signature signature;
4782 if (execution_is_reusable) {
4783 signature = CreateExecutionCacheSignature(context, node, delegate_options,
4784 *tensor_memory_map_);
4785 execution = nn_execution_cache_.Get(signature);
4786 }
4787 bool should_create_new_execution = execution == nullptr;
4788
4789 // Manages the lifetime of the new execution.
4790 UniqueExecution unique_execution(nullptr, NNFreeExecution(nnapi_));
4791 if (should_create_new_execution) {
4792 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4793 nnapi_->ANeuralNetworksExecution_create(
4794 nn_compilation_.get(), &execution),
4795 "creating NNAPI execution", nnapi_errno);
4796 unique_execution.reset(execution);
4797
4798 if (nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13) {
4799 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4800 context,
4801 nnapi_->ANeuralNetworksExecution_setReusable(execution,
4802 /*reusable=*/true),
4803 "making execution reusable", nnapi_errno);
4804 }
4805 if (delegate_options.vendor_execution_hints && vendor_plugin_) {
4806 TF_LITE_ENSURE_STATUS(vendor_plugin_->ConfigureExecutionHints(
4807 delegate_options.vendor_execution_hints, execution));
4808 }
4809
4810 // Allow padding bytes for execution inputs & outputs if applicable.
4811 if (allow_padding) {
4812 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4813 context,
4814 nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding(
4815 execution, /*enable=*/true),
4816 "setting allow padding for execution intputs and outputs",
4817 nnapi_errno);
4818 }
4819 // Set compilation timeout if applicable.
4820 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4821 if (delegate_options.max_execution_timeout_duration_ns > 0) {
4822 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4823 context,
4824 nnapi_->ANeuralNetworksExecution_setTimeout(
4825 execution, delegate_options.max_execution_timeout_duration_ns),
4826 "setting execution timeout", nnapi_errno);
4827 }
4828 if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
4829 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4830 context,
4831 nnapi_->ANeuralNetworksExecution_setLoopTimeout(
4832 execution,
4833 delegate_options.max_execution_loop_timeout_duration_ns),
4834 "setting execution loop timeout", nnapi_errno);
4835 }
4836 }
4837 // Check if the size of input and output memory pool needs to be resized.
4838 if (delegate_options.allow_dynamic_dimensions) {
4839 size_t total_input_byte_size = 0;
4840 // Make the TensorFlow Lite inputs and outputs to ann_indices.
4841 for (int i : TfLiteIntArrayView(node->inputs)) {
4842 // Constant tensors are not NNAPI inputs.
4843 if (i != kTfLiteOptionalTensor &&
4844 context->tensors[i].allocation_type != kTfLiteMmapRo &&
4845 // The delegate might not have mapped this input (this can
4846 // happen if one tensor is split in several ones)
4847 mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i) != -1) {
4848 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4849 continue;
4850 }
4851 const TfLiteType nn_type_conversion =
4852 mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
4853 i);
4854 int tensor_size = 0;
4855 if (nn_type_conversion == kTfLiteNoType) {
4856 tensor_size = context->tensors[i].bytes;
4857 } else {
4858 size_t type_size;
4859 TF_LITE_ENSURE_OK(
4860 context,
4861 GetSizeOfType(context, nn_type_conversion, &type_size));
4862 tensor_size = NumElements(&context->tensors[i]) * type_size;
4863 }
4864 total_input_byte_size += tensor_size;
4865 total_input_byte_size += GetNumPaddingBytes(tensor_size);
4866 }
4867 }
4868 if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
4869 nn_input_memory_ = std::make_unique<NNMemory>(nnapi_, "input_pool",
4870 total_input_byte_size);
4871 // Reset all cached executions when the memory pool is recreated.
4872 nn_execution_cache_.Clear();
4873 }
4874
4875 size_t total_output_byte_size = 0;
4876 for (int i : TfLiteIntArrayView(node->outputs)) {
4877 const auto& tensor = context->tensors[i];
4878 if (tensor.buffer_handle != kTfLiteNullBufferHandle) {
4879 continue;
4880 }
4881 size_t tensor_size = tensor.bytes;
4882 if (!can_infer_output_shape && HasUnspecifiedDimension(&tensor)) {
4883 if (tensor_max_size_hints_[i] == 0) {
4884 TF_LITE_KERNEL_LOG(context,
4885 "Missing max tensor size for tensor#%d. When a "
4886 "vendor plugin is supplied, max tensor size is "
4887 "required for all dynamic output tensors.",
4888 i);
4889 return kTfLiteError;
4890 }
4891 tensor_size = std::max(tensor_size, tensor_max_size_hints_[i]);
4892 }
4893 total_output_byte_size += tensor_size;
4894 total_output_byte_size += GetNumPaddingBytes(tensor_size);
4895 }
4896 if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
4897 nn_output_memory_ = std::make_unique<NNMemory>(nnapi_, "output_pool",
4898 total_output_byte_size);
4899 // Reset all cached executions when the memory pool is recreated.
4900 nn_execution_cache_.Clear();
4901 }
4902 }
4903
4904 if (execution_is_reusable) {
4905 // The execution ownership is transfered to nn_execution_cache_.
4906 nn_execution_cache_.Put(signature, std::move(unique_execution));
4907 unique_execution = nullptr;
4908 }
4909 }
4910 // Set the input tensor buffers. Note: we access tflite tensors using
4911 // absolute indices but NN api indices inputs by relative indices.
4912 int relative_input_index = 0;
4913
4914 const bool use_int8_asymm_signed =
4915 target_feature_level_ >= kMinSdkVersionForNNAPI13;
4916
4917 size_t input_offset = 0;
4918 for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
4919 if (absolute_input_index == kTfLiteOptionalTensor) {
4920 continue;
4921 }
4922 ANeuralNetworksOperandType input_nn_operand_type;
4923 ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
4924 TfLiteTensor* tensor = &context->tensors[absolute_input_index];
4925 TfLiteType ann_type_equivalent =
4926 mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
4927 absolute_input_index);
4928 if (delegate_options.allow_dynamic_dimensions &&
4929 ::tflite::HasUnspecifiedDimension(tensor)) {
4930 input_nn_operand_type = ConvertTensorTypeToNNType(
4931 tensor, ann_type_equivalent, use_int8_asymm_signed);
4932 input_nn_operand_type_ptr = &input_nn_operand_type;
4933 }
4934 if (tensor->allocation_type != kTfLiteMmapRo) {
4935 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4936 tensor->buffer_handle < tensor_memory_map_->size()) {
4937 if (should_create_new_execution) {
4938 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4939 context,
4940 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4941 execution, relative_input_index, input_nn_operand_type_ptr,
4942 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4943 tensor->bytes),
4944 "associating NNAPI execution input with a memory object", tensor,
4945 nnapi_errno);
4946 }
4947 relative_input_index++;
4948 continue;
4949 }
4950 int tensor_size = 0;
4951 int padding_bytes = 0;
4952 if (ann_type_equivalent != kTfLiteNoType) {
4953 const auto num_elements = NumElements(tensor);
4954 uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
4955 if (tensor->type == kTfLiteUInt8 &&
4956 ann_type_equivalent == kTfLiteInt32) {
4957 for (int i = 0; i < num_elements; ++i) {
4958 reinterpret_cast<int32_t*>(input_ptr)[i] =
4959 static_cast<const int32_t>(tensor->data.uint8[i]);
4960 }
4961 } else if (tensor->type == kTfLiteInt8 &&
4962 ann_type_equivalent == kTfLiteUInt8) {
4963 // Explicitly convert int8 values to uint8 values.
4964 for (int i = 0; i < num_elements; ++i) {
4965 input_ptr[i] = static_cast<const uint8_t>(
4966 static_cast<int32_t>(tensor->data.int8[i]) + 128);
4967 }
4968 } else if (tensor->type == kTfLiteInt8 &&
4969 ann_type_equivalent == kTfLiteInt32) {
4970 if (use_int8_asymm_signed) {
4971 for (int i = 0; i < num_elements; ++i) {
4972 reinterpret_cast<int32_t*>(input_ptr)[i] =
4973 static_cast<const int32_t>(tensor->data.int8[i]);
4974 }
4975 } else {
4976 for (int i = 0; i < num_elements; ++i) {
4977 reinterpret_cast<int32_t*>(input_ptr)[i] =
4978 static_cast<const int32_t>(tensor->data.int8[i]) + 128;
4979 }
4980 }
4981 } else if (tensor->type == kTfLiteInt64 &&
4982 ann_type_equivalent == kTfLiteInt32) {
4983 // Check that values fit into int32.
4984 int32_t* input_ptr_i32 = reinterpret_cast<int32_t*>(input_ptr);
4985 for (int i = 0; i < num_elements; ++i) {
4986 if (input_ptr_i32[i] < std::numeric_limits<int32_t>::min() ||
4987 input_ptr_i32[i] > std::numeric_limits<int32_t>::max()) {
4988 TF_LITE_KERNEL_LOG(context,
4989 "NN API Delegate: int64 value out of bounds "
4990 "for int32 target NNAPI tensor\n");
4991 return kTfLiteError;
4992 }
4993 input_ptr_i32[i] = static_cast<int32_t>(tensor->data.i64[i]);
4994 }
4995 } else {
4996 TF_LITE_KERNEL_LOG(
4997 context,
4998 "NN API Delegate: unsupported tensor types conversion: "
4999 "from type code %d to type code %d.\n",
5000 tensor->type, ann_type_equivalent);
5001 return kTfLiteError;
5002 }
5003 size_t type_size;
5004 TF_LITE_ENSURE_OK(
5005 context, GetSizeOfType(context, ann_type_equivalent, &type_size));
5006 tensor_size = NumElements(tensor) * type_size;
5007 padding_bytes = GetNumPaddingBytes(tensor_size);
5008 if (should_create_new_execution) {
5009 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5010 context,
5011 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
5012 execution, relative_input_index, input_nn_operand_type_ptr,
5013 nn_input_memory_->get_handle(), input_offset,
5014 GetNNTensorSize(tensor_size, allow_padding)),
5015 "associating NNAPI execution input with a memory object", tensor,
5016 nnapi_errno);
5017 }
5018 } else if (mapping_util_->TfLiteIndexToNnIndex(
5019 mapping_util_.get(), absolute_input_index) != -1) {
5020 // copy data to pre-allocated shared memory.
5021 memcpy(nn_input_memory_->get_data_ptr() + input_offset,
5022 tensor->data.raw, tensor->bytes);
5023 tensor_size = tensor->bytes;
5024 padding_bytes = GetNumPaddingBytes(tensor_size);
5025 if (should_create_new_execution) {
5026 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5027 context,
5028 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
5029 execution, relative_input_index, input_nn_operand_type_ptr,
5030 nn_input_memory_->get_handle(), input_offset,
5031 GetNNTensorSize(tensor_size, allow_padding)),
5032 "associating NNAPI execution input with a memory object", tensor,
5033 nnapi_errno);
5034 }
5035 }
5036 input_offset += tensor_size + padding_bytes;
5037 relative_input_index++;
5038 }
5039 }
5040
5041 // Set the output tensor buffers.
5042 int relative_output_index = 0;
5043 size_t output_offset = 0;
5044 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
5045 // If the NNAPI implementation doesn't have some of the outputs
5046 // they are left unmapped and we should not try to read their value here
5047 if (mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(),
5048 output_index) == -1) {
5049 continue;
5050 }
5051 ANeuralNetworksOperandType output_nn_operand_type;
5052 ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
5053 TfLiteTensor* tensor = &context->tensors[output_index];
5054 if (delegate_options.allow_dynamic_dimensions && can_infer_output_shape &&
5055 ::tflite::HasUnspecifiedDimension(tensor)) {
5056 TfLiteType ann_type_equivalent =
5057 mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
5058 output_index);
5059 output_nn_operand_type = ConvertTensorTypeToNNType(
5060 tensor, ann_type_equivalent, use_int8_asymm_signed);
5061 output_nn_operand_type_ptr = &output_nn_operand_type;
5062 }
5063 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
5064 tensor->buffer_handle < tensor_memory_map_->size() &&
5065 should_create_new_execution) {
5066 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5067 context,
5068 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
5069 execution, relative_output_index, output_nn_operand_type_ptr,
5070 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
5071 tensor->bytes),
5072 "associating NNAPI execution output to a memory object", tensor,
5073 nnapi_errno);
5074
5075 } else {
5076 size_t tensor_size = tensor->bytes;
5077 if (!can_infer_output_shape && HasUnspecifiedDimension(tensor)) {
5078 tensor_size =
5079 std::max(tensor->bytes, tensor_max_size_hints_[output_index]);
5080 }
5081 int padding_bytes = GetNumPaddingBytes(tensor_size);
5082 if (should_create_new_execution) {
5083 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
5084 context,
5085 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
5086 execution, relative_output_index, output_nn_operand_type_ptr,
5087 nn_output_memory_->get_handle(), output_offset,
5088 GetNNTensorSize(tensor_size, allow_padding)),
5089 "associating NNAPI execution output to a memory object", tensor,
5090 nnapi_errno);
5091 }
5092 output_offset += tensor_size + padding_bytes;
5093 }
5094 relative_output_index++;
5095 }
5096
5097 // Set memory for NNAPI state_outputs.
5098 for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
5099 int state_tensor_idx = model_state_tfl_inputs_[i];
5100 TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
5101 int padding_bytes = GetNumPaddingBytes(tensor->bytes);
5102 if (should_create_new_execution) {
5103 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5104 context,
5105 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
5106 execution, relative_output_index, nullptr,
5107 nn_output_memory_->get_handle(), output_offset,
5108 GetNNTensorSize(tensor->bytes, allow_padding)),
5109 "associating NNAPI execution state output to a memory object",
5110 nnapi_errno);
5111 }
5112 output_offset += tensor->bytes + padding_bytes;
5113 relative_output_index++;
5114 }
5115
5116 // Invoke ANN in blocking fashion.
5117 if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
5118 ANeuralNetworksEvent* event = nullptr;
5119 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5120 context,
5121 nnapi_->ANeuralNetworksExecution_startCompute(execution, &event),
5122 "starting async computation", nnapi_errno);
5123 const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
5124 nnapi_->ANeuralNetworksEvent_free(event);
5125 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
5126 "waiting for async computation completion",
5127 nnapi_errno);
5128 } else {
5129 // Use Burst mode by default for NNAPI 1.2+.
5130 if (nn_burst_) {
5131 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5132 context,
5133 nnapi_->ANeuralNetworksExecution_burstCompute(execution,
5134 nn_burst_.get()),
5135 "running burst computation", nnapi_errno);
5136 } else {
5137 // Use synchronous execution for NNAPI 1.2+ as a fallback.
5138 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5139 context, nnapi_->ANeuralNetworksExecution_compute(execution),
5140 "running computation", nnapi_errno);
5141 }
5142 }
5143
5144 // resize dynamic output tensors
5145 if (!can_infer_output_shape) {
5146 relative_output_index = 0;
5147 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
5148 TfLiteTensor* tensor = &context->tensors[output_index];
5149 if (HasUnspecifiedDimension(tensor)) {
5150 auto* new_dims = TfLiteIntArrayCreate(tensor->dims->size);
5151 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5152 context,
5153 nnapi_->ANeuralNetworksExecution_getOutputOperandDimensions(
5154 execution, relative_output_index,
5155 reinterpret_cast<uint32_t*>(new_dims->data)),
5156 "get output operand dimensions", nnapi_errno);
5157 TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, tensor, new_dims));
5158 }
5159 relative_output_index++;
5160 }
5161 }
5162
5163 // copy results from shared memory to the destination.
5164 output_offset = 0;
5165 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
5166 TfLiteTensor* tensor = &context->tensors[output_index];
5167 if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
5168 continue;
5169 }
5170 TfLiteType ann_type_equivalent =
5171 mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(),
5172 output_index);
5173 if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
5174 // Explicitly convert uint8 values to int8 values.
5175 uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
5176 nn_output_memory_->get_data_ptr() + output_offset);
5177 const auto num_elements = NumElements(tensor);
5178 for (int i = 0; i < num_elements; ++i) {
5179 output_ptr[i] =
5180 static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
5181 }
5182 }
5183 memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
5184 tensor->bytes);
5185 size_t tensor_size = tensor->bytes;
5186 if (!can_infer_output_shape && HasUnspecifiedDimension(tensor)) {
5187 tensor_size =
5188 std::max(tensor->bytes, tensor_max_size_hints_[output_index]);
5189 }
5190 output_offset += tensor_size;
5191 output_offset += GetNumPaddingBytes(tensor_size);
5192 }
5193 // The state_out of previous invocation need to be copied to state_in of
5194 // current invocation.
5195 for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
5196 int state_tensor_idx = model_state_tfl_inputs_[i];
5197 TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
5198 memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
5199 tensor->bytes);
5200 output_offset += tensor->bytes;
5201 output_offset += GetNumPaddingBytes(tensor->bytes);
5202 }
5203
5204 // copy output of all output tensors in feedback_loops_ into the
5205 // associated input
5206 for (auto feedback_loop : feedback_loops_) {
5207 int output_tensor_idx;
5208 int input_tensor_idx;
5209 std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
5210 TfLiteTensor& src = context->tensors[output_tensor_idx];
5211 TfLiteTensor& dest = context->tensors[input_tensor_idx];
5212
5213 memcpy(dest.data.raw, src.data.raw, src.bytes);
5214 }
5215
5216 return kTfLiteOk;
5217 }
5218
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,int tflite_node_index,NNAPIOpBuilder * builder,int * nnapi_errno)5219 void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
5220 const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
5221 int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
5222 // Depending on the operator and the input data format, Dequantize
5223 // operators may need to be added. For example when the input is
5224 // floating-point but weights are quantized then the weights will first be
5225 // dequantized to the same format as the input before being passed to the
5226 // operator.
5227
5228 // The tensor determining whether the inputs should be floating-point.
5229 int input_tensor_index = -1;
5230 std::vector<int> inputs_to_potentially_dequantize;
5231
5232 switch (builtin_code) {
5233 case kTfLiteBuiltinConv2d:
5234 case kTfLiteBuiltinFullyConnected: {
5235 input_tensor_index = 0;
5236 // Weights and bias are inputs #1 and #2 respectively and may require
5237 // dequantization.
5238 inputs_to_potentially_dequantize = {1, 2};
5239 break;
5240 }
5241 case kTfLiteBuiltinLstm: {
5242 input_tensor_index = 0;
5243 inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7,
5244 8, 9, 10, 11, 12, 13, 14,
5245 15, 16, 17, 20, 21, 22, 23};
5246 break;
5247 }
5248 default:
5249 return;
5250 }
5251
5252 int tensor_id = node->inputs->data[input_tensor_index];
5253 if (tensor_id < 0) return;
5254
5255 // Nothing to do if the input is not floating-point.
5256 if (!IsFloat(context->tensors[tensor_id].type)) return;
5257
5258 for (int i : inputs_to_potentially_dequantize) {
5259 if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index.
5260 tensor_id = node->inputs->data[i];
5261 if (tensor_id < 0) continue; // Ignore optional input.
5262
5263 const TfLiteType type = context->tensors[tensor_id].type;
5264 // Nothing to do for this tensor if it's not quantized.
5265 if (!IsQuantized(type)) continue;
5266
5267 // Insert Dequantize operator if it hasn't been done already and change
5268 // the node's input accordingly.
5269 builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
5270 }
5271 }
5272
DensifyAndDequantizeConstTensor(TfLiteContext * context,int densify_node_id,bool should_dequantize,NNAPIOpBuilder & builder)5273 TfLiteStatus NNAPIDelegateKernel::DensifyAndDequantizeConstTensor(
5274 TfLiteContext* context, int densify_node_id, bool should_dequantize,
5275 NNAPIOpBuilder& builder) {
5276 TfLiteNode* densify_node;
5277 TfLiteRegistration* reg;
5278 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5279 context, densify_node_id, &densify_node, ®));
5280 int sparse_weight_tid = densify_node->inputs->data[0];
5281 auto input_tensor = context->tensors[sparse_weight_tid];
5282 auto output_tensor = context->tensors[densify_node->outputs->data[0]];
5283 if (input_tensor.sparsity == nullptr) {
5284 return kTfLiteError;
5285 }
5286 const int dims_count = output_tensor.dims->size;
5287 std::vector<int> vector_shape(dims_count);
5288 for (int i = 0; i < dims_count; i++) {
5289 vector_shape[i] = output_tensor.dims->data[i];
5290 }
5291 size_t dense_size;
5292 int new_tensor_index = -1;
5293 switch (input_tensor.type) {
5294 case kTfLiteFloat32: {
5295 dense_size = output_tensor.bytes / sizeof(float);
5296 std::vector<float> output_data(dense_size);
5297 tflite::internal::sparsity::FormatConverter<float> converter(
5298 vector_shape, *input_tensor.sparsity);
5299 converter.SparseToDense(static_cast<const float*>(input_tensor.data.data),
5300 dense_size, output_data.data(), context);
5301 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
5302 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
5303 output_data, output_tensor.params, &new_tensor_index));
5304 break;
5305 }
5306 case kTfLiteFloat16: {
5307 dense_size = output_tensor.bytes / sizeof(Eigen::half);
5308 std::vector<uint16_t> output_data(dense_size);
5309 Eigen::half* unpacked_fp16_data =
5310 reinterpret_cast<Eigen::half*>(output_data.data());
5311 tflite::internal::sparsity::FormatConverter<Eigen::half> converter(
5312 vector_shape, *input_tensor.sparsity);
5313 converter.SparseToDense(
5314 static_cast<const Eigen::half*>(input_tensor.data.data), dense_size,
5315 unpacked_fp16_data, context);
5316 if (should_dequantize) {
5317 // we need to dequantize the fp16 dense tensor
5318 std::vector<float> float_dense_data(dense_size);
5319 for (int i = 0; i < dense_size; ++i) {
5320 float_dense_data[i] = fp16_ieee_to_fp32_value(
5321 reinterpret_cast<uint16_t*>(output_data.data())[i]);
5322 }
5323 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
5324 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
5325 float_dense_data, output_tensor.params, &new_tensor_index));
5326 } else {
5327 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<uint16_t>(
5328 ANEURALNETWORKS_TENSOR_FLOAT16, kTfLiteFloat16, output_tensor.dims,
5329 output_data, output_tensor.params, &new_tensor_index));
5330 }
5331 break;
5332 }
5333 case kTfLiteInt8: {
5334 dense_size = output_tensor.bytes / sizeof(int8_t);
5335 std::vector<int8_t> output_data(dense_size);
5336 tflite::internal::sparsity::FormatConverter<int8_t> converter(
5337 vector_shape, *input_tensor.sparsity);
5338 converter.SparseToDense(
5339 static_cast<const int8_t*>(input_tensor.data.data), dense_size,
5340 output_data.data(), context);
5341 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<int8_t>(
5342 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
5343 output_tensor.dims, output_data, output_tensor.params,
5344 &new_tensor_index));
5345 break;
5346 }
5347 default: {
5348 return kTfLiteError;
5349 }
5350 }
5351 return kTfLiteOk;
5352 }
5353
ResizeTfLiteIntArray(TfLiteIntArray * old_array,int new_size,int init_value)5354 TfLiteIntArray* ResizeTfLiteIntArray(TfLiteIntArray* old_array, int new_size,
5355 int init_value) {
5356 TfLiteIntArray* ret = TfLiteIntArrayCreate(new_size);
5357 if (ret) {
5358 int size_to_copy = 0;
5359 if (old_array) {
5360 size_to_copy = new_size > old_array->size ? old_array->size : new_size;
5361 memcpy(ret->data, old_array->data, size_to_copy * sizeof(int));
5362 }
5363 for (int i = size_to_copy; i < ret->size; i++) {
5364 ret->data[i] = init_value;
5365 }
5366 }
5367 TfLiteIntArrayFree(old_array);
5368 return ret;
5369 }
5370
operator ()(NnapiMappingUtilCInterface * mapping_util)5371 void NNFreeMappingUtil::operator()(NnapiMappingUtilCInterface* mapping_util) {
5372 NnapiMappingContext* mapping_context =
5373 reinterpret_cast<NnapiMappingContext*>(mapping_util->context);
5374 delete (mapping_context);
5375 mapping_util->context = nullptr;
5376 free(mapping_util);
5377 }
5378
5379 class NnapiMappingUtilCInterfaceImpl {
5380 public:
TfLiteIndexToNnIndex(NnapiMappingUtilCInterface * mapping,int index)5381 static int TfLiteIndexToNnIndex(NnapiMappingUtilCInterface* mapping,
5382 int index) {
5383 NnapiMappingContext* mapping_context =
5384 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5385 const size_t max_size = mapping_context->lite_tensor_to_ann_tensor_.size();
5386 if (index >= 0 && index < max_size)
5387 return mapping_context->lite_tensor_to_ann_tensor_[index];
5388 else
5389 return -1;
5390 }
5391
AddNewNonTensorOperand(NnapiMappingUtilCInterface * mapping)5392 static int AddNewNonTensorOperand(NnapiMappingUtilCInterface* mapping) {
5393 NnapiMappingContext* mapping_context =
5394 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5395 return mapping_context->next_ann_tensor_index_++;
5396 }
5397
AddDelegateGeneratedInputAnnTensorOperand(NnapiMappingUtilCInterface * mapping)5398 static int AddDelegateGeneratedInputAnnTensorOperand(
5399 NnapiMappingUtilCInterface* mapping) {
5400 NnapiMappingContext* mapping_context =
5401 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5402 return mapping_context->next_ann_tensor_index_++;
5403 }
5404
AddNewNnTensorIndex(NnapiMappingUtilCInterface * mapping,int tflite_index)5405 static int AddNewNnTensorIndex(NnapiMappingUtilCInterface* mapping,
5406 int tflite_index) {
5407 NnapiMappingContext* mapping_context =
5408 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5409 const size_t current_size =
5410 mapping_context->lite_tensor_to_ann_tensor_.size();
5411 if (tflite_index >= current_size) {
5412 mapping_context->lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
5413 }
5414 const int new_tensor_index = mapping_context->next_ann_tensor_index_++;
5415 mapping_context->lite_tensor_to_ann_tensor_[tflite_index] =
5416 new_tensor_index;
5417 return new_tensor_index;
5418 }
5419
TfLiteIndexToNnTypeConversion(NnapiMappingUtilCInterface * mapping,int index)5420 static TfLiteType TfLiteIndexToNnTypeConversion(
5421 NnapiMappingUtilCInterface* mapping, int index) {
5422 NnapiMappingContext* mapping_context =
5423 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5424 const size_t max_size = mapping_context->index_to_type_conversion_.size();
5425 if (index >= 0 && index < max_size)
5426 return static_cast<TfLiteType>(
5427 mapping_context->index_to_type_conversion_[index]);
5428 else
5429 return kTfLiteNoType;
5430 }
5431
AddTypeConversion(NnapiMappingUtilCInterface * mapping,int tflite_index,TfLiteType tflite_type)5432 static void AddTypeConversion(NnapiMappingUtilCInterface* mapping,
5433 int tflite_index, TfLiteType tflite_type) {
5434 NnapiMappingContext* mapping_context =
5435 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5436 const size_t current_size =
5437 mapping_context->index_to_type_conversion_.size();
5438 if (tflite_index >= current_size) {
5439 mapping_context->index_to_type_conversion_.resize(tflite_index + 1,
5440 kTfLiteNoType);
5441 }
5442 mapping_context->index_to_type_conversion_[tflite_index] = tflite_type;
5443 }
5444
AddNnapiToTfliteOpMapping(NnapiMappingUtilCInterface * mapping,int tflite_node_index)5445 static void AddNnapiToTfliteOpMapping(NnapiMappingUtilCInterface* mapping,
5446 int tflite_node_index) {
5447 NnapiMappingContext* mapping_context =
5448 reinterpret_cast<NnapiMappingContext*>(mapping->context);
5449 mapping_context->nnapi_to_tflite_op_mapping_.push_back(tflite_node_index);
5450 }
5451 };
5452
5453 NnapiMappingUtilCInterface*
NnapiMappingUtilCInterfaceCreate()5454 NNAPIDelegateKernel::NnapiMappingUtilCInterfaceCreate() {
5455 NnapiMappingUtilCInterface* mapping =
5456 static_cast<NnapiMappingUtilCInterface*>(
5457 malloc(sizeof(NnapiMappingUtilCInterface)));
5458 mapping->context = new NnapiMappingContext();
5459 mapping->TfLiteIndexToNnIndex =
5460 NnapiMappingUtilCInterfaceImpl::TfLiteIndexToNnIndex;
5461 mapping->AddNewNonTensorOperand =
5462 NnapiMappingUtilCInterfaceImpl::AddNewNonTensorOperand;
5463 mapping->AddDelegateGeneratedInputAnnTensorOperand =
5464 NnapiMappingUtilCInterfaceImpl::AddDelegateGeneratedInputAnnTensorOperand;
5465 mapping->AddNewNnTensorIndex =
5466 NnapiMappingUtilCInterfaceImpl::AddNewNnTensorIndex;
5467 mapping->TfLiteIndexToNnTypeConversion =
5468 NnapiMappingUtilCInterfaceImpl::TfLiteIndexToNnTypeConversion;
5469 mapping->AddTypeConversion =
5470 NnapiMappingUtilCInterfaceImpl::AddTypeConversion;
5471 mapping->AddNnapiToTfliteOpMapping =
5472 NnapiMappingUtilCInterfaceImpl::AddNnapiToTfliteOpMapping;
5473 return mapping;
5474 }
5475
AddOpsAndTensors(TfLiteContext * context,int * nnapi_errno,bool allow_dynamic_dimensions)5476 TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
5477 TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
5478 DequantizeMapping dequantize_mapping;
5479 // The operand builder allows creating a single op. It is created outside
5480 // the for loop to avoid reallocating the vectors.
5481 NNAPIOpBuilder builder(nnapi_, context, mapping_util_.get(),
5482 &dequantize_mapping, &allocation_memory_mapping_,
5483 nn_model_.get(), nnapi_errno,
5484 allow_dynamic_dimensions);
5485 // If we have target accelerators the target SDK version might be
5486 // different than the current android version.
5487 target_feature_level_ = nnapi_->nnapi_runtime_feature_level;
5488 if (!nnapi_devices_.empty()) {
5489 TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
5490 context, nnapi_, nnapi_devices_, &target_feature_level_, nnapi_errno));
5491 }
5492 // First path, handle const fp16->fp32 dequantize and densify if needed.
5493 for (auto node_index : nodes_) {
5494 TfLiteNode* node = nullptr;
5495 TfLiteRegistration* registration = nullptr;
5496 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5497 context, node_index, &node, ®istration));
5498 if (IsDequantizeConstFloat16(context, node, registration)) {
5499 builder.AddTensorInput(node->inputs->data[0], /*hybrid_op=*/false,
5500 NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION |
5501 NN_TENSOR_FLAG_SCALAR_AS_TENSOR);
5502 }
5503 if (IsDensifyConstTensor(context, node, registration)) {
5504 densify_output_to_node_mapping_[node->outputs->data[0]] = node_index;
5505 }
5506 if (IsDequantizeNonConstFloat16(context, node, registration)) {
5507 non_const_dequantize_output_to_node_mapping_[node->outputs->data[0]] =
5508 node_index;
5509 }
5510 }
5511 // Clear the input and output lists for the dequantize path.
5512 builder.ClearInputOuputLists();
5513
5514 // Add other tensors.
5515 for (auto node_index : nodes_) {
5516 // Obtain the op and registration.
5517 TfLiteNode* node;
5518 TfLiteRegistration* reg;
5519 TF_LITE_ENSURE_STATUS(
5520 context->GetNodeAndRegistration(context, node_index, &node, ®));
5521 // skip DENSIFY -> DEQUANTIZE as they are handled elsewhere.
5522 if (IsDensifyConstTensor(context, node, reg) ||
5523 IsDequantizeNonConstFloat16(context, node, reg)) {
5524 continue;
5525 }
5526
5527 // Use vendor plugin to map the node if needed.
5528 if (vendor_plugin_ && vendor_plugin_->ValidateNode(context, reg, node)) {
5529 TF_LITE_ENSURE_STATUS(vendor_plugin_->MapNode(
5530 context, node, node_index, mapping_util_.get(), nn_model_.get()));
5531 continue;
5532 }
5533 // Delegate PACK by lowering it into CONCAT + RESHAPE.
5534 if (reg->builtin_code == kTfLiteBuiltinPack &&
5535 target_feature_level_ < kNNAPIRuntimeFeatureLevel6) {
5536 TF_LITE_ENSURE_STATUS(
5537 builder.TransformPackIntoSupportedOps(node_index, node, reg));
5538 continue;
5539 }
5540 // Delegate UNPACK by lowering it into RESHAPE + SPLIT.
5541 if (reg->builtin_code == kTfLiteBuiltinUnpack) {
5542 TF_LITE_ENSURE_STATUS(
5543 builder.TransformUnpackIntoSupportedOps(node_index, node, reg));
5544 continue;
5545 }
5546 // Delegate SPLIT_V by lowering it into SLICEs.
5547 if (reg->builtin_code == kTfLiteBuiltinSplitV) {
5548 TF_LITE_ENSURE_STATUS(
5549 builder.TransformSplitVIntoSupportedOps(node_index, node, reg));
5550 continue;
5551 }
5552 // Delegate SQUARED_DIFFERENCE by lowering it into SUB + MUL.
5553 if (reg->builtin_code == kTfLiteBuiltinSquaredDifference) {
5554 TF_LITE_ENSURE_STATUS(builder.TransformSquaredDifferenceIntoSupportedOps(
5555 node_index, node, reg));
5556 continue;
5557 }
5558 // Fully quantized full LSTM.
5559 if (target_feature_level_ >= kMinSdkVersionForNNAPI13 &&
5560 reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
5561 context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
5562 const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
5563
5564 constexpr int kInputTensor = 0;
5565 constexpr int kInputToInputWeightsTensor = 1;
5566 constexpr int kRecurrentToInputWeightsTensor = 5;
5567 constexpr int kInputGateBiasTensor = 12;
5568 constexpr int kForgetGateBiasTensor = 13;
5569 constexpr int kCellGateBiasTensor = 14;
5570 constexpr int kOutputGateBiasTensor = 15;
5571 constexpr int kProjectionWeightsTensor = 16;
5572 constexpr int kProjectionBiasTensor = 17;
5573 constexpr int kPrevOutputTensor = 18;
5574
5575 // Add input tensors.
5576 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
5577 const auto input_index = node->inputs->data[input_pos];
5578 if (input_index == kTfLiteOptionalTensor) {
5579 if (input_pos == kInputToInputWeightsTensor ||
5580 input_pos == kRecurrentToInputWeightsTensor ||
5581 input_pos == kProjectionWeightsTensor) {
5582 TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
5583 } else if (input_pos == kInputGateBiasTensor ||
5584 input_pos == kForgetGateBiasTensor ||
5585 input_pos == kCellGateBiasTensor ||
5586 input_pos == kOutputGateBiasTensor ||
5587 input_pos == kProjectionBiasTensor) {
5588 TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
5589 } else { // cell-to-* and layer norm weights.
5590 TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
5591 }
5592 } else {
5593 // Only input and previous output use INT8_ASYM_SIGNED.
5594 int flags =
5595 (input_pos == kInputTensor || input_pos == kPrevOutputTensor)
5596 ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
5597 : 0;
5598 TF_LITE_ENSURE_STATUS(
5599 builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
5600 }
5601 }
5602
5603 // Add clip parameters.
5604 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
5605 TF_LITE_ENSURE_STATUS(
5606 builder.AddScalarFloat32Operand(builtin->cell_clip));
5607 TF_LITE_ENSURE_STATUS(
5608 builder.AddScalarFloat32Operand(builtin->proj_clip));
5609
5610 // Add quantization parameters for intermediate tensors.
5611 TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
5612 for (int intermediate_pos = 0;
5613 intermediate_pos < node->intermediates->size; ++intermediate_pos) {
5614 const auto intermediate_index =
5615 node->intermediates->data[intermediate_pos];
5616 const TfLiteTensor& tensor = context->tensors[intermediate_index];
5617 TfLiteAffineQuantization* quantization_params =
5618 static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
5619 if (intermediate_pos == 4) {
5620 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5621 quantization_params->zero_point->data[0]));
5622 }
5623 TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
5624 quantization_params->scale->data[0]));
5625 }
5626
5627 // Activation state output.
5628 int ann_index;
5629 builder.AddStateInt8AsymTensor(
5630 node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
5631 model_state_outputs_.push_back(ann_index);
5632 model_state_tfl_inputs_.push_back(
5633 node->inputs->data[/*kInputActivationStateTensor*/ 18]);
5634
5635 // Cell state output.
5636 builder.AddStateInt16Tensor(
5637 node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
5638 model_state_outputs_.push_back(ann_index);
5639 model_state_tfl_inputs_.push_back(
5640 node->inputs->data[/*kInputCellStateTensor*/ 19]);
5641
5642 // Add output tensors.
5643 for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
5644 const auto output_index = node->outputs->data[output_pos];
5645 TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
5646 output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
5647 }
5648
5649 builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
5650 continue;
5651 }
5652
5653 const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
5654 const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
5655 const bool need_int8_conversion =
5656 target_feature_level_ < kMinSdkVersionForNNAPI13 &&
5657 NeedInt8Conversion(context, reg->builtin_code, node);
5658 const bool use_int8_asymm_signed =
5659 target_feature_level_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
5660
5661 // skip DEQUANTIZE (fp16 -> fp32) as it is handled elsewhere
5662 if (IsDequantizeConstFloat16(context, node, reg)) {
5663 continue;
5664 }
5665
5666 int input_tensor_flags = 0;
5667 if (scalar_as_tensor) {
5668 input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
5669 }
5670 if (use_int8_asymm_signed) {
5671 input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
5672 }
5673
5674 // On SDK level less than 30, h_swish will be lowered into supported NNAPI
5675 // operations. Since SDK level 30, h_swish is supported as a single
5676 // operation.
5677 if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
5678 nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
5679 builder.TransformHardSwishIntoSupportedOps(
5680 node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
5681 node_index);
5682 continue;
5683 }
5684 // For PACK, NNAPI expects the axis scalar before all input tensors.
5685 if (reg->builtin_code == kTfLiteBuiltinPack) {
5686 const auto* builtin =
5687 reinterpret_cast<TfLitePackParams*>(node->builtin_data);
5688 // NNAPI only accepts non-negative axis.
5689 auto& input_tensor = context->tensors[node->inputs->data[0]];
5690 int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
5691 : builtin->axis;
5692 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(axis));
5693 }
5694 // Map inputs to NN API tensor indices.
5695 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
5696 if (node->inputs->data[input_pos] != kTfLiteOptionalTensor &&
5697 context->tensors[node->inputs->data[input_pos]].type ==
5698 kTfLiteFloat16 &&
5699 IsConstantTensor(&context->tensors[node->inputs->data[input_pos]])) {
5700 input_tensor_flags |= NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
5701 }
5702 if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
5703 // Everything is added during Map since input tensors
5704 // have different order.
5705 continue;
5706 }
5707 if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
5708 node->inputs->data[input_pos] == kTfLiteOptionalTensor) {
5709 // skip optional bias and handle it during mapping
5710 continue;
5711 }
5712 const auto input_index = node->inputs->data[input_pos];
5713 // handle sparse weights for Conv2d
5714 if (reg->builtin_code == kTfLiteBuiltinConv2d && input_pos == 1) {
5715 int densify_node_id = -1;
5716 bool should_dequantize = false;
5717 int dequantize_node_id =
5718 non_const_dequantize_output_to_node_mapping_[input_index];
5719 if (dequantize_node_id != -1) {
5720 should_dequantize = true;
5721 // Find densify->dequantize pattern.
5722 TfLiteNode* dequant_node;
5723 TfLiteRegistration* reg;
5724 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5725 context, dequantize_node_id, &dequant_node, ®));
5726 densify_node_id =
5727 densify_output_to_node_mapping_[dequant_node->inputs->data[0]];
5728 } else {
5729 densify_node_id = densify_output_to_node_mapping_[input_index];
5730 }
5731 if (densify_node_id != -1) {
5732 TF_LITE_ENSURE_STATUS(DensifyAndDequantizeConstTensor(
5733 context, densify_node_id, should_dequantize, builder));
5734 continue;
5735 }
5736 }
5737 if (need_int8_conversion &&
5738 (input_pos == 0 ||
5739 reg->builtin_code == kTfLiteBuiltinFullyConnected ||
5740 reg->builtin_code == kTfLiteBuiltinConv2d ||
5741 reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
5742 reg->builtin_code == kTfLiteBuiltinAdd ||
5743 reg->builtin_code == kTfLiteBuiltinMul ||
5744 reg->builtin_code == kTfLiteBuiltinSub ||
5745 reg->builtin_code == kTfLiteBuiltinConcatenation ||
5746 reg->builtin_code == kTfLiteBuiltinMaximum ||
5747 reg->builtin_code == kTfLiteBuiltinMinimum ||
5748 reg->builtin_code == kTfLiteBuiltinLeakyRelu ||
5749 reg->builtin_code == kTfLiteBuiltinLess ||
5750 reg->builtin_code == kTfLiteBuiltinLessEqual ||
5751 reg->builtin_code == kTfLiteBuiltinPrelu ||
5752 reg->builtin_code == kTfLiteBuiltinGreater ||
5753 reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
5754 reg->builtin_code == kTfLiteBuiltinEqual ||
5755 reg->builtin_code == kTfLiteBuiltinNotEqual ||
5756 reg->builtin_code == kTfLiteBuiltinSelect)) {
5757 // Only selected inputs require int8 conversion.
5758 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
5759 input_index, hybrid_op,
5760 input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
5761 continue;
5762 }
5763 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
5764 input_pos >= 20) {
5765 // Skip layer normalization weights. They are added in the Map
5766 // function (after all the other inputs added there) since layer
5767 // normalization weights are the last four inputs of the LSTM op in
5768 // NNAPI.
5769 continue;
5770 }
5771 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
5772 // Configuring all inputs in the Map function
5773 continue;
5774 }
5775 if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
5776 if (input_pos >= 20) {
5777 // Skip layer normalization weights. They are added in the Map
5778 // function (after all the other inputs added there) since layer
5779 // normalization weights are the last four inputs of the
5780 // unidirectional sequence LSTM op in NNAPI.
5781 continue;
5782 }
5783 if (input_index == kTfLiteOptionalTensor) {
5784 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
5785 continue;
5786 }
5787 }
5788 if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
5789 (input_index == node->inputs->data[0])) {
5790 // Skip the axis input tensor; it will be added as a scalar operand
5791 // by the Map() mapping.
5792 continue;
5793 }
5794
5795 // Pad and Padv2 have an optional parameter for a pad value which has
5796 // to be converted to a scalar type in NN API.
5797 if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
5798 reg->builtin_code == kTfLiteBuiltinPad) &&
5799 node->inputs->size == 3 && input_pos == 2) {
5800 const int constant_value_id = node->inputs->data[2];
5801 if (constant_value_id == kTfLiteOptionalTensor) {
5802 continue;
5803 }
5804 const TfLiteTensor constant_value = context->tensors[constant_value_id];
5805
5806 switch (constant_value.type) {
5807 case kTfLiteFloat32:
5808 if (constant_value.allocation_type == kTfLiteMmapRo) {
5809 builder.AddScalarFloat32Operand(*constant_value.data.f);
5810 } else {
5811 builder.AddSingleValueTensorAsScalarOperand(
5812 constant_value_id, ANEURALNETWORKS_FLOAT32);
5813 }
5814 break;
5815 case kTfLiteUInt8:
5816 if (constant_value.allocation_type == kTfLiteMmapRo) {
5817 builder.AddScalarInt32Operand(
5818 static_cast<int32_t>(*constant_value.data.uint8));
5819 } else {
5820 builder.AddSingleValueTensorAsScalarOperand(
5821 constant_value_id, ANEURALNETWORKS_INT32);
5822 }
5823 break;
5824 case kTfLiteInt8:
5825 if (constant_value.allocation_type == kTfLiteMmapRo) {
5826 if (need_int8_conversion) {
5827 builder.AddScalarInt32Operand(
5828 static_cast<int32_t>(*constant_value.data.int8) + 128);
5829 } else {
5830 builder.AddScalarInt32Operand(*constant_value.data.int8);
5831 }
5832 } else {
5833 builder.AddSingleValueTensorAsScalarOperand(
5834 constant_value_id, ANEURALNETWORKS_INT32);
5835 }
5836 break;
5837 default:
5838 TF_LITE_KERNEL_LOG(context,
5839 "Unsupported type of pad value for pad_v2\n");
5840 return kTfLiteError;
5841 }
5842 continue;
5843 }
5844
5845 if (input_index == kTfLiteOptionalTensor &&
5846 (reg->builtin_code == kTfLiteBuiltinLstm ||
5847 reg->builtin_code == kTfLiteBuiltinSvdf ||
5848 reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
5849 // properly handle the optional tensor for LSTM and SVDF.
5850 // currently only support float32.
5851 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
5852 } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
5853 reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
5854 if (input_pos == 0) {
5855 // Only the first input tensor is added. The second one,
5856 // specifying the output height and width, is not added and
5857 // instead the height and width will be added individually as
5858 // scalars by the mapping function returned by Map().
5859 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5860 input_tensor_flags));
5861 }
5862 } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
5863 // The K parameter tensor is not handled here but by the functor
5864 // returned by Map, the input tensor is instead added in
5865 // the else clause below
5866 continue;
5867 } else if (reg->builtin_code == kTfLiteBuiltinGather) {
5868 // Everything else is added during Map since input tensors
5869 // have different order.
5870 if (input_pos == 0) {
5871 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5872 input_tensor_flags));
5873 }
5874 continue;
5875 } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
5876 input_pos == 1) {
5877 // The axis param is added during Map
5878 continue;
5879 } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
5880 input_pos == 2) {
5881 // NNAPI does not support crops.
5882 // The Map function will check if all crops are zero.
5883 continue;
5884 } else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
5885 reg->builtin_code == kTfLiteBuiltinArgMax) {
5886 // The first input tensor is added as is. The second one, specifying
5887 // the axis, needs to be converted to a scalar since TFLite uses a
5888 // tensor but NNAPI uses a scalar as the axis.
5889 if (input_pos == 0) {
5890 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5891 input_tensor_flags));
5892 } else {
5893 const int axis_id = node->inputs->data[1];
5894 const TfLiteTensor& axis_tensor = context->tensors[axis_id];
5895 switch (axis_tensor.type) {
5896 case kTfLiteInt32:
5897 if (axis_tensor.allocation_type == kTfLiteMmapRo) {
5898 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5899 static_cast<int32_t>(*axis_tensor.data.i32)));
5900 } else {
5901 TF_LITE_ENSURE_STATUS(
5902 builder.AddSingleValueTensorAsScalarOperand(
5903 axis_id, ANEURALNETWORKS_INT32));
5904 }
5905 break;
5906 case kTfLiteInt64:
5907 // Map() function already makes sure int64 input is constant.
5908 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5909 static_cast<int32_t>(*axis_tensor.data.i64)));
5910 break;
5911 default:
5912 return kTfLiteError;
5913 }
5914 }
5915 } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
5916 reg->builtin_code == kTfLiteBuiltinMinimum) {
5917 const TfLiteTensor& operand_tensor =
5918 context->tensors[node->inputs->data[input_pos]];
5919 if (operand_tensor.dims->size == 0) {
5920 int tensor_index;
5921
5922 TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
5923 kTfLiteMmapRo);
5924 switch (operand_tensor.type) {
5925 case kTfLiteFloat32:
5926 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5927 ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
5928 std::vector<float>(1, operand_tensor.data.f[0]),
5929 operand_tensor.params, &tensor_index));
5930 break;
5931 case kTfLiteUInt8:
5932 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5933 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
5934 std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
5935 operand_tensor.params, &tensor_index));
5936 break;
5937 case kTfLiteInt8: {
5938 auto params = operand_tensor.params;
5939 if (params.scale == 0.0) {
5940 params.scale = 1.0;
5941 }
5942
5943 if (use_int8_asymm_signed) {
5944 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5945 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
5946 operand_tensor.type, {1},
5947 std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
5948 &tensor_index));
5949 } else {
5950 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5951 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
5952 {1},
5953 std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
5954 params, &tensor_index));
5955 }
5956 } break;
5957 case kTfLiteInt32:
5958 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5959 ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
5960 std::vector<int32_t>(1, operand_tensor.data.i32[0]),
5961 operand_tensor.params, &tensor_index));
5962 break;
5963 default:
5964 return kTfLiteError;
5965 }
5966 } else {
5967 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5968 input_tensor_flags));
5969 }
5970 } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
5971 reg->builtin_code == kTfLiteBuiltinReduceMax ||
5972 reg->builtin_code == kTfLiteBuiltinReduceMin ||
5973 reg->builtin_code == kTfLiteBuiltinReduceProd ||
5974 reg->builtin_code == kTfLiteBuiltinSum ||
5975 reg->builtin_code == kTfLiteBuiltinMean) &&
5976 (input_pos == 1)) {
5977 // The axis needs, be converted to a tensor if specified as scalar
5978 const TfLiteTensor& axis_tensor =
5979 context->tensors[node->inputs->data[input_pos]];
5980 if (axis_tensor.dims->size == 0) {
5981 TF_LITE_ENSURE_STATUS(
5982 builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
5983 } else {
5984 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5985 input_tensor_flags));
5986 }
5987 } else if (reg->builtin_code == kTfLiteBuiltinFill) {
5988 if (input_pos == 0) {
5989 const int dims_id = node->inputs->data[0];
5990 const TfLiteTensor& dims_tensor = context->tensors[dims_id];
5991 switch (dims_tensor.type) {
5992 case kTfLiteInt32:
5993 TF_LITE_ENSURE_STATUS(
5994 builder.AddTensorInput(input_index, hybrid_op));
5995 break;
5996 case kTfLiteInt64: {
5997 // We made sure that dimensions are constant and fit into int32
5998 // in Map(), so we can safely create a new tensor with casted
5999 // values.
6000 const int dims_size = dims_tensor.dims->data[0];
6001 std::vector<int32_t> dims_int32(dims_size);
6002 std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
6003 dims_int32.begin());
6004 int new_tensor_index = -1;
6005 builder.AddNewInputConstantTensor(
6006 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
6007 dims_int32, dims_tensor.params, &new_tensor_index);
6008 } break;
6009 default:
6010 return kTfLiteError;
6011 }
6012 } else {
6013 const int value_id = node->inputs->data[1];
6014 const TfLiteTensor& value_tensor = context->tensors[value_id];
6015 switch (value_tensor.type) {
6016 case kTfLiteFloat32:
6017 if (value_tensor.allocation_type == kTfLiteMmapRo) {
6018 TF_LITE_ENSURE_STATUS(
6019 builder.AddScalarFloat32Operand(*value_tensor.data.f));
6020 } else {
6021 TF_LITE_ENSURE_STATUS(
6022 builder.AddSingleValueTensorAsScalarOperand(
6023 value_id, ANEURALNETWORKS_FLOAT32));
6024 }
6025 break;
6026 case kTfLiteInt32:
6027 if (value_tensor.allocation_type == kTfLiteMmapRo) {
6028 TF_LITE_ENSURE_STATUS(
6029 builder.AddScalarInt32Operand(*value_tensor.data.i32));
6030 } else {
6031 TF_LITE_ENSURE_STATUS(
6032 builder.AddSingleValueTensorAsScalarOperand(
6033 value_id, ANEURALNETWORKS_INT32));
6034 }
6035 break;
6036 case kTfLiteInt64:
6037 if (value_tensor.allocation_type == kTfLiteMmapRo) {
6038 // Map() function already makes sure const int64 input fits into
6039 // int32.
6040 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
6041 static_cast<int32_t>(*value_tensor.data.i64)));
6042 } else {
6043 TF_LITE_ENSURE_STATUS(
6044 builder.AddSingleValueTensorAsScalarOperand(
6045 value_id, ANEURALNETWORKS_INT32));
6046 }
6047 break;
6048 default:
6049 return kTfLiteError;
6050 }
6051 }
6052 } else {
6053 TF_LITE_ENSURE_STATUS(
6054 builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
6055 }
6056 }
6057
6058 // Get op type and operands
6059 // Fails if the Validate function failed
6060 int nn_op_type;
6061 TF_LITE_ENSURE_STATUS(
6062 Map(context, reg->builtin_code, reg->version, target_feature_level_,
6063 {context, &builder, node, node_index, &model_state_outputs_,
6064 &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
6065 &nn_op_type));
6066
6067 // Map outputs to NN API tensor indices.
6068 int output_tensor_flags = 0;
6069 if (need_int8_conversion) {
6070 output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
6071 }
6072 if (use_int8_asymm_signed) {
6073 output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
6074 }
6075 // fc_nn_intermediate_output_index is used to indicate whether additional
6076 // RESHAPE op is needed.
6077 int fc_nn_intermediate_output_index = -1;
6078 // mean_nn_intermediate_output_index is used to indicate whether additional
6079 // re-quantization is needed.
6080 int mean_nn_intermediate_output_index = -1;
6081 for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
6082 auto output_index = node->outputs->data[output_pos];
6083
6084 // Outputs for basic LSTM cell are set in the Map function since
6085 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
6086 continue;
6087 }
6088 // Handle FC with keep_num_dims==true.
6089 if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
6090 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data)
6091 ->keep_num_dims) {
6092 auto& output_tensor = context->tensors[output_index];
6093
6094 int num_units = output_tensor.dims->data[output_tensor.dims->size - 1];
6095 std::vector<uint32_t> output_dims(2);
6096 output_dims[0] = NumElements(output_tensor.dims) / num_units;
6097 output_dims[1] = num_units;
6098 TF_LITE_ENSURE_STATUS(builder.AddIntermediateOutputTensor(
6099 output_tensor.type, output_dims.size(), output_dims.data(),
6100 output_tensor.params.scale, output_tensor.params.zero_point,
6101 &fc_nn_intermediate_output_index));
6102 } else if (reg->builtin_code == kTfLiteBuiltinMean &&
6103 IsMeanWithDifferentInputOutputQuantization(context, node)) {
6104 // Handle MEAN with different input and output quantization params.
6105 auto& input_tensor = context->tensors[node->inputs->data[0]];
6106 auto& output_tensor = context->tensors[output_index];
6107 TF_LITE_ENSURE_STATUS(builder.AddIntermediateOutputTensor(
6108 output_tensor.type, output_tensor.dims->size,
6109 reinterpret_cast<const uint32_t*>(output_tensor.dims->data),
6110 input_tensor.params.scale, input_tensor.params.zero_point,
6111 &mean_nn_intermediate_output_index, need_int8_conversion));
6112 } else {
6113 TF_LITE_ENSURE_STATUS(
6114 builder.AddTensorOutput(output_index, output_tensor_flags));
6115 }
6116 }
6117
6118 // Dequantize operators may have to be added in case inputs are to be
6119 // floating-point.
6120 AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
6121 node_index, &builder, nnapi_errno);
6122
6123 TF_LITE_ENSURE_OK(context_,
6124 builder.FinalizeAddOperation(nn_op_type, node_index));
6125 if (fc_nn_intermediate_output_index > -1) {
6126 TF_LITE_ENSURE_STATUS(builder.AppendReshape(
6127 fc_nn_intermediate_output_index, node->outputs->data[0], node_index));
6128 }
6129 if (mean_nn_intermediate_output_index > -1) {
6130 TF_LITE_ENSURE_STATUS(builder.AppendRequantize(
6131 mean_nn_intermediate_output_index, node->outputs->data[0], node_index,
6132 output_tensor_flags));
6133 }
6134 }
6135 return kTfLiteOk;
6136 }
6137
BuildGraph(TfLiteContext * context,const StatefulNnApiDelegate::Options & delegate_options,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors,int * nnapi_errno)6138 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
6139 TfLiteContext* context,
6140 const StatefulNnApiDelegate::Options& delegate_options,
6141 const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
6142 int* nnapi_errno) {
6143 // Build the ops and tensors.
6144 TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
6145 context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
6146 // Map input and output tensor indices to ANN
6147 std::vector<uint32_t> inputs;
6148 inputs.reserve(input_tensors->size);
6149 std::vector<uint32_t> outputs;
6150 outputs.reserve(output_tensors->size);
6151
6152 size_t total_input_byte_size = 0;
6153 // Make the TensorFlow Lite inputs and outputs to ann_indices.
6154 for (int i : TfLiteIntArrayView(input_tensors)) {
6155 // Constant tensors are not NNAPI inputs.
6156 if (i != kTfLiteOptionalTensor &&
6157 context->tensors[i].allocation_type != kTfLiteMmapRo &&
6158 // The delegate might not have mapped this input (this can
6159 // happen if one tensor is split in several ones)
6160 mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i) != -1) {
6161 inputs.push_back(
6162 mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i));
6163 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
6164 continue;
6165 }
6166 const TfLiteType nn_type_conversion =
6167 mapping_util_->TfLiteIndexToNnTypeConversion(mapping_util_.get(), i);
6168 int tensor_size = 0;
6169 if (nn_type_conversion == kTfLiteNoType) {
6170 tensor_size =
6171 std::max(context->tensors[i].bytes, tensor_max_size_hints_[i]);
6172 } else {
6173 size_t type_size;
6174 TF_LITE_ENSURE_OK(
6175 context, GetSizeOfType(context, nn_type_conversion, &type_size));
6176 tensor_size = NumElements(&context->tensors[i]) * type_size;
6177 }
6178 total_input_byte_size += tensor_size;
6179 total_input_byte_size += GetNumPaddingBytes(tensor_size);
6180 }
6181 }
6182
6183 size_t total_output_byte_size = 0;
6184 for (int i : TfLiteIntArrayView(output_tensors)) {
6185 const int output_tensor_ann_index =
6186 mapping_util_->TfLiteIndexToNnIndex(mapping_util_.get(), i);
6187 // Unmapped outputs are not added
6188 if (output_tensor_ann_index != -1) {
6189 outputs.push_back(output_tensor_ann_index);
6190 }
6191 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
6192 continue;
6193 }
6194 size_t tensor_size =
6195 std::max(context->tensors[i].bytes, tensor_max_size_hints_[i]);
6196 total_output_byte_size += tensor_size;
6197 total_output_byte_size += GetNumPaddingBytes(tensor_size);
6198 }
6199
6200 // Add state output tensors as model outputs.
6201 for (int i = 0; i < model_state_outputs_.size(); i++) {
6202 outputs.push_back(model_state_outputs_[i]);
6203 auto tfl_state_idx = model_state_tfl_inputs_[i];
6204 total_output_byte_size += context->tensors[tfl_state_idx].bytes;
6205 total_output_byte_size +=
6206 GetNumPaddingBytes(context->tensors[tfl_state_idx].bytes);
6207 }
6208
6209 // Tell ANN to declare inputs/outputs
6210 RETURN_TFLITE_ERROR_IF_NN_ERROR(
6211 context,
6212 nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
6213 nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
6214 outputs.data()),
6215 "identifying model inputs and outputs", nnapi_errno);
6216
6217 auto allow_fp16 =
6218 context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
6219 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
6220 RETURN_TFLITE_ERROR_IF_NN_ERROR(
6221 context,
6222 nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
6223 nn_model_.get(), allow_fp16),
6224 "set relaxed computation mode for fp32 if possible", nnapi_errno);
6225 }
6226
6227 RETURN_TFLITE_ERROR_IF_NN_ERROR(
6228 context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
6229 "finalizing the model", nnapi_errno);
6230
6231 // Create shared memory pool for inputs and outputs.
6232 nn_input_memory_ =
6233 std::make_unique<NNMemory>(nnapi_, "input_pool", total_input_byte_size);
6234 nn_output_memory_ =
6235 std::make_unique<NNMemory>(nnapi_, "output_pool", total_output_byte_size);
6236
6237 return kTfLiteOk;
6238 }
6239
LogCompilationInfoOnce(const NnApi * nnapi,const ANeuralNetworksDiagnosticCompilationInfo * info)6240 void NNAPIDelegateKernel::LogCompilationInfoOnce(
6241 const NnApi* nnapi, const ANeuralNetworksDiagnosticCompilationInfo* info) {
6242 TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO,
6243 "NNAPI SL compilation callback called.");
6244
6245 const int32_t session_id =
6246 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getSessionId(info);
6247 const int32_t error_code =
6248 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getErrorCode(info);
6249 const uint64_t compilation_time_ns =
6250 nnapi
6251 ->SL_ANeuralNetworksDiagnosticCompilationInfo_getCompilationTimeNanos(
6252 info);
6253 const int64_t nnapi_version =
6254 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getNnApiVersion(info);
6255 const uint8_t model_arch_hash_first_byte =
6256 *nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getModelArchHash(
6257 info);
6258 const std::string device_ids_string = std::string(
6259 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getDeviceIds(info));
6260 const ANeuralNetworksDiagnosticDataClass input_data_class =
6261 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getInputDataClass(
6262 info);
6263 const ANeuralNetworksDiagnosticDataClass output_data_class =
6264 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_getOutputDataClass(
6265 info);
6266 const bool is_caching_enabled =
6267 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_isCachingEnabled(info);
6268 const bool is_control_flow_used =
6269 nnapi->SL_ANeuralNetworksDiagnosticCompilationInfo_isControlFlowUsed(
6270 info);
6271
6272 TFLITE_LOG_PROD_ONCE(
6273 TFLITE_LOG_INFO,
6274 "Compilation info: getSessionId=%d getErrorCode=%d "
6275 "getCompilationTimeNanos=%" PRIu64 " getNnApiVersion=%" PRId64
6276 " getDeviceIds=%s getModelArchHash=%x getInputDataClass=%d "
6277 "getOutputDataClass=%d isCachingEnabled=%s isControlFlowUser=%s",
6278 session_id, error_code, compilation_time_ns, nnapi_version,
6279 device_ids_string.c_str(), unsigned{model_arch_hash_first_byte},
6280 input_data_class, output_data_class, is_caching_enabled ? "Y" : "N",
6281 is_control_flow_used ? "Y" : "N");
6282 }
6283
LogExecutionInfoOnce(const NnApi * nnapi,const ANeuralNetworksDiagnosticExecutionInfo * info)6284 void NNAPIDelegateKernel::LogExecutionInfoOnce(
6285 const NnApi* nnapi, const ANeuralNetworksDiagnosticExecutionInfo* info) {
6286 TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "NNAPI SL execution callback called.");
6287
6288 const int32_t session_id =
6289 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getSessionId(info);
6290
6291 const int32_t error_code =
6292 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getErrorCode(info);
6293
6294 const int64_t nnapi_version =
6295 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getNnApiVersion(info);
6296
6297 const uint8_t model_arch_hash_first_byte =
6298 *nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getModelArchHash(info);
6299 const std::string device_ids_string = std::string(
6300 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getDeviceIds(info));
6301 const ANeuralNetworksDiagnosticDataClass input_data_class =
6302 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getInputDataClass(info);
6303 const ANeuralNetworksDiagnosticDataClass output_data_class =
6304 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getOutputDataClass(info);
6305 const bool is_caching_enabled =
6306 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_isCachingEnabled(info);
6307 const bool is_control_flow_used =
6308 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_isControlFlowUsed(info);
6309 const ANeuralNetworksDiagnosticExecutionMode execution_mode =
6310 nnapi->SL_ANeuralNetworksDiagnosticExecutionInfo_getExecutionMode(info);
6311
6312 const uint64_t runtime_time_ns =
6313 nnapi
6314 ->SL_ANeuralNetworksDiagnosticExecutionInfo_getRuntimeExecutionTimeNanos( // NOLINT line too long
6315 info);
6316
6317 const uint64_t driver_time_ns =
6318 nnapi
6319 ->SL_ANeuralNetworksDiagnosticExecutionInfo_getDriverExecutionTimeNanos( // NOLINT line too long
6320 info);
6321
6322 const uint64_t hardware_time_ns =
6323 nnapi
6324 ->SL_ANeuralNetworksDiagnosticExecutionInfo_getHardwareExecutionTimeNanos( // NOLINT line too long
6325 info);
6326
6327 TFLITE_LOG_PROD_ONCE(
6328 TFLITE_LOG_INFO,
6329 "Execution info: getSessionId=%d getErrorCode=%d "
6330 "getNnApiVersion=%" PRId64
6331 " getModelArchHash=%x getDeviceIds=%s getInputDataClass=%d "
6332 "getOutputDataClass=%d isCachingEnabled=%s isControlFlowUsed=%s "
6333 "getExecutionMode=%d getRuntimeExecutionTimeNanos=%" PRIu64
6334 " getDriverExecutionTimeNanos=%" PRIu64
6335 " getHardwareExecutionTimeNanos=%" PRIu64,
6336 session_id, error_code, nnapi_version,
6337 unsigned{model_arch_hash_first_byte}, device_ids_string.c_str(),
6338 input_data_class, output_data_class, is_caching_enabled ? "Y" : "N",
6339 is_control_flow_used ? "Y" : "N", execution_mode, runtime_time_ns,
6340 driver_time_ns, hardware_time_ns);
6341 }
6342
6343 } // namespace nnapi
6344 } // namespace delegate
6345
6346 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
6347 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
6348 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
6349 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
6350
Data(const NnApi * nnapi)6351 StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
Data(std::unique_ptr<const NnApi> nnapi)6352 StatefulNnApiDelegate::Data::Data(std::unique_ptr<const NnApi> nnapi)
6353 : nnapi(nnapi.get()), owned_nnapi(std::move(nnapi)) {}
6354
~Data()6355 StatefulNnApiDelegate::Data::~Data() {
6356 std::for_each(std::begin(delegate_state_cache),
6357 std::end(delegate_state_cache),
6358 [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
6359 delete entry.second;
6360 });
6361 }
6362
CacheDelegateKernel(const TfLiteDelegateParams * delegate_params,NNAPIDelegateKernel * delegate_state)6363 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
6364 const TfLiteDelegateParams* delegate_params,
6365 NNAPIDelegateKernel* delegate_state) {
6366 const int cache_key = delegate_params->nodes_to_replace->data[0];
6367 delegate_state_cache.emplace(cache_key, delegate_state);
6368 }
6369
MaybeGetCachedDelegateKernel(const TfLiteDelegateParams * delegate_params)6370 NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
6371 const TfLiteDelegateParams* delegate_params) {
6372 const int cache_key = delegate_params->nodes_to_replace->data[0];
6373 const auto cached_state = delegate_state_cache.find(cache_key);
6374 if (cached_state != std::end(delegate_state_cache)) {
6375 auto result = cached_state->second;
6376 delegate_state_cache.erase(cached_state);
6377 return result;
6378 } else {
6379 return nullptr;
6380 }
6381 }
6382
StatefulNnApiDelegateConstructorImpl(const Options & options)6383 void StatefulNnApiDelegate::StatefulNnApiDelegateConstructorImpl(
6384 const Options& options) {
6385 if (options.accelerator_name) {
6386 delegate_data_.accelerator_name = options.accelerator_name;
6387 }
6388 if (options.cache_dir) {
6389 delegate_data_.cache_dir = options.cache_dir;
6390 }
6391 if (options.model_token) {
6392 delegate_data_.model_token = options.model_token;
6393 }
6394 delegate_data_.execution_preference = options.execution_preference;
6395 delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
6396 delegate_data_.max_number_delegated_partitions =
6397 options.max_number_delegated_partitions;
6398 delegate_data_.allow_fp16 = options.allow_fp16;
6399 delegate_data_.execution_priority = options.execution_priority;
6400 delegate_data_.max_compilation_timeout_duration_ns =
6401 options.max_compilation_timeout_duration_ns;
6402 delegate_data_.max_execution_timeout_duration_ns =
6403 options.max_execution_timeout_duration_ns;
6404 delegate_data_.max_execution_loop_timeout_duration_ns =
6405 options.max_execution_loop_timeout_duration_ns;
6406 if (delegate_data_.nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
6407 delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
6408 }
6409 delegate_data_.use_burst_computation = options.use_burst_computation;
6410 delegate_data_.vendor_compilation_hints = options.vendor_compilation_hints;
6411 delegate_data_.vendor_execution_hints = options.vendor_execution_hints;
6412 delegate_data_.vendor_plugin = options.vendor_plugin;
6413 delegate_data_.max_execution_cache_size = options.max_execution_cache_size;
6414 delegate_data_.tensor_max_size_hints = options.tensor_max_size_hints;
6415
6416 TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
6417 "Created TensorFlow Lite delegate for NNAPI.");
6418 Prepare = DoPrepare;
6419 CopyFromBufferHandle = DoCopyFromBufferHandle;
6420 CopyToBufferHandle = DoCopyToBufferHandle;
6421 FreeBufferHandle = DoFreeBufferHandle;
6422 data_ = &delegate_data_;
6423 if (delegate_data_.allow_dynamic_dimensions) {
6424 flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
6425 // TFLite cannot propagate tensor shapes if custom operators are used.
6426 if (!delegate_data_.vendor_plugin) {
6427 flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
6428 }
6429 }
6430 }
6431
StatefulNnApiDelegate(const NnApi * nnapi)6432 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
6433 : StatefulNnApiDelegate(nnapi, Options()) {}
6434
StatefulNnApiDelegate(Options options)6435 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
6436 : StatefulNnApiDelegate(NnApiImplementation(), options) {}
6437
StatefulNnApiDelegate(const NnApiSLDriverImplFL5 * nnapi_support_library_driver,Options options)6438 StatefulNnApiDelegate::StatefulNnApiDelegate(
6439 const NnApiSLDriverImplFL5* nnapi_support_library_driver, Options options)
6440 : TfLiteDelegate(TfLiteDelegateCreate()),
6441 delegate_data_(
6442 CreateNnApiFromSupportLibrary(nnapi_support_library_driver)) {
6443 StatefulNnApiDelegateConstructorImpl(options);
6444 }
6445
StatefulNnApiDelegate(const NnApi * nnapi,Options options)6446 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
6447 Options options)
6448 : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
6449 StatefulNnApiDelegateConstructorImpl(options);
6450 }
6451
StatefulNnApiDelegate()6452 StatefulNnApiDelegate::StatefulNnApiDelegate()
6453 : StatefulNnApiDelegate(Options()) {}
6454
GetOptions(TfLiteDelegate * delegate)6455 const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
6456 TfLiteDelegate* delegate) {
6457 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6458 StatefulNnApiDelegate::Options options;
6459 options.execution_preference = delegate_data->execution_preference;
6460 options.accelerator_name = delegate_data->accelerator_name.empty()
6461 ? nullptr
6462 : delegate_data->accelerator_name.c_str();
6463 options.cache_dir = delegate_data->cache_dir.empty()
6464 ? nullptr
6465 : delegate_data->cache_dir.c_str();
6466 options.model_token = delegate_data->model_token.empty()
6467 ? nullptr
6468 : delegate_data->model_token.c_str();
6469 options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
6470 options.max_number_delegated_partitions =
6471 delegate_data->max_number_delegated_partitions;
6472 options.allow_fp16 = delegate_data->allow_fp16;
6473 options.execution_priority = delegate_data->execution_priority;
6474 options.max_compilation_timeout_duration_ns =
6475 delegate_data->max_compilation_timeout_duration_ns;
6476 options.max_execution_timeout_duration_ns =
6477 delegate_data->max_execution_timeout_duration_ns;
6478 options.max_execution_loop_timeout_duration_ns =
6479 delegate_data->max_execution_loop_timeout_duration_ns;
6480 options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
6481 options.use_burst_computation = delegate_data->use_burst_computation;
6482 options.vendor_compilation_hints = delegate_data->vendor_compilation_hints;
6483 options.vendor_execution_hints = delegate_data->vendor_execution_hints;
6484 options.vendor_plugin = delegate_data->vendor_plugin;
6485 options.max_execution_cache_size = delegate_data->max_execution_cache_size;
6486 options.tensor_max_size_hints = delegate_data->tensor_max_size_hints;
6487 return options;
6488 }
6489
6490 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
GetTensorMemoryMap(TfLiteDelegate * delegate)6491 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
6492 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6493 return delegate_data->tensor_memory_map;
6494 }
6495
GetCache(TfLiteDelegate * delegate)6496 delegates::Serialization* StatefulNnApiDelegate::GetCache(
6497 TfLiteDelegate* delegate) {
6498 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6499 return delegate_data->cache.get();
6500 }
6501
RegisterNnapiMemory(ANeuralNetworksMemory * memory,CopyToHostTensorFnPtr callback,void * callback_context)6502 TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
6503 ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
6504 void* callback_context) {
6505 uint64_t timestamp = delegate_data_.next_buffer_handle_timestamp++;
6506 int map_size = delegate_data_.tensor_memory_map.size();
6507 for (int i = 0; i < map_size; i++) {
6508 if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
6509 delegate_data_.tensor_memory_map[i] = {memory, callback, callback_context,
6510 timestamp};
6511 return i;
6512 }
6513 }
6514 delegate_data_.tensor_memory_map.push_back(
6515 {memory, callback, callback_context, timestamp});
6516 return map_size;
6517 }
6518
DoCopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)6519 TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
6520 TfLiteContext* context, TfLiteDelegate* delegate,
6521 TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
6522 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6523 if (buffer_handle < 0 ||
6524 buffer_handle >= delegate_data->tensor_memory_map.size()) {
6525 return kTfLiteError;
6526 }
6527 auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
6528 auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
6529 auto callback_context =
6530 delegate_data->tensor_memory_map[buffer_handle].callback_context;
6531 if (!memory || !callback) {
6532 return kTfLiteError;
6533 }
6534 return callback(tensor, memory, 0, tensor->bytes, callback_context);
6535 }
6536
DoCopyToBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)6537 TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
6538 TfLiteContext* context, TfLiteDelegate* delegate,
6539 TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
6540 return kTfLiteError;
6541 }
6542
DoFreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)6543 void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
6544 TfLiteDelegate* delegate,
6545 TfLiteBufferHandle* handle) {
6546 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
6547 if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
6548 delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
6549 *handle = kTfLiteNullBufferHandle;
6550 }
6551 }
6552
GetNnApiErrno() const6553 int StatefulNnApiDelegate::GetNnApiErrno() const {
6554 return delegate_data_.nnapi_errno;
6555 }
6556
6557 // static
GetNodesSupportedByAccelerator(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,const std::vector<int> & supported_nodes,std::vector<int> * device_supported_nodes,int * num_partitions,TfLiteDelegateParams ** params_array,int * nnapi_errno)6558 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
6559 TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
6560 const std::vector<int>& supported_nodes,
6561 std::vector<int>* device_supported_nodes, int* num_partitions,
6562 TfLiteDelegateParams** params_array, int* nnapi_errno) {
6563 auto* delegate_data = static_cast<Data*>(delegate->data_);
6564 // The first entry in the array is the element count
6565
6566 auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
6567 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
6568 context, supported_nodes_int_array.get(), params_array, num_partitions));
6569 // For each partition check if which nodes are actually supported by the
6570 // target accelerators.
6571 delegate_data->delegate_state_cache.clear();
6572 for (int idx = 0; idx < *num_partitions; idx++) {
6573 const auto& partition_params = (*params_array)[idx];
6574 std::unique_ptr<NNAPIDelegateKernel> kernel_state(
6575 new NNAPIDelegateKernel(nnapi, delegate_data->vendor_plugin));
6576 TfLiteDelegateParams params_with_delegate = partition_params;
6577 params_with_delegate.delegate = delegate;
6578 TF_LITE_ENSURE_STATUS(
6579 kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno));
6580 std::vector<int> supported_partition_nodes;
6581 TF_LITE_ENSURE_STATUS(
6582 kernel_state->GetOperationsSupportedByTargetNnApiDevices(
6583 context, &supported_partition_nodes, nnapi_errno));
6584 device_supported_nodes->insert(device_supported_nodes->end(),
6585 supported_partition_nodes.begin(),
6586 supported_partition_nodes.end());
6587
6588 bool model_fully_supported = (supported_partition_nodes.size() ==
6589 partition_params.nodes_to_replace->size);
6590 if (model_fully_supported) {
6591 delegate_data->CacheDelegateKernel(&partition_params,
6592 kernel_state.release());
6593 }
6594 }
6595
6596 if (device_supported_nodes->size() != supported_nodes.size()) {
6597 // We changed the set of nodes to delegate this will create a different
6598 // partitioning layout.
6599 auto device_sup_nodes_int_array =
6600 BuildTfLiteIntArray(*device_supported_nodes);
6601 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
6602 context, device_sup_nodes_int_array.get(), params_array,
6603 num_partitions));
6604 }
6605
6606 return kTfLiteOk;
6607 }
6608
6609 // static
LimitDelegatedPartitions(int max_partitions,std::vector<TfLiteDelegateParams> partition_params_array,std::vector<int> * nodes_to_delegate)6610 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
6611 int max_partitions,
6612 std::vector<TfLiteDelegateParams> partition_params_array,
6613 std::vector<int>* nodes_to_delegate) {
6614 int num_partitions = partition_params_array.size();
6615 if (max_partitions <= 0 || num_partitions <= max_partitions) {
6616 return kTfLiteOk;
6617 }
6618
6619 int number_delegated_partitions = std::count_if(
6620 partition_params_array.begin(), partition_params_array.end(),
6621 [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
6622 return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
6623 partition_params.nodes_to_replace->data[0]) !=
6624 nodes_to_delegate->end();
6625 });
6626
6627 if (number_delegated_partitions > max_partitions) {
6628 std::sort(partition_params_array.begin(), partition_params_array.end(),
6629 [](const TfLiteDelegateParams& left,
6630 const TfLiteDelegateParams& right) -> bool {
6631 // Reverse sort
6632 return left.nodes_to_replace->size >
6633 right.nodes_to_replace->size;
6634 });
6635
6636 nodes_to_delegate->clear();
6637
6638 for (int i = 0; i < max_partitions; i++) {
6639 const TfLiteDelegateParams& partition_params = partition_params_array[i];
6640
6641 nodes_to_delegate->insert(nodes_to_delegate->end(),
6642 partition_params.nodes_to_replace->data,
6643 partition_params.nodes_to_replace->data +
6644 partition_params.nodes_to_replace->size);
6645 }
6646 }
6647
6648 return kTfLiteOk;
6649 }
6650
GetSupportedOpsWithFp16WeightRemapping(TfLiteContext * context,int target_feature_level,bool is_accelerator_specified,int max_number_delegated_partitions)6651 static std::vector<int> GetSupportedOpsWithFp16WeightRemapping(
6652 TfLiteContext* context, int target_feature_level,
6653 bool is_accelerator_specified, int max_number_delegated_partitions) {
6654 std::vector<int> supported_nodes;
6655 delegates::IsNodeSupportedFn node_supported_fn =
6656 [=](TfLiteContext* context, TfLiteNode* node,
6657 TfLiteRegistration* registration,
6658 std::string* unsupported_details) -> bool {
6659 std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
6660 const auto is_supported = NNAPIDelegateKernel::Validate(
6661 context, registration, target_feature_level, node,
6662 is_accelerator_specified, nullptr, &map_failures);
6663 if (!is_supported) {
6664 if (unsupported_details) {
6665 for (auto& failure : map_failures) {
6666 unsupported_details->append(failure.message.c_str());
6667 }
6668 }
6669 return false;
6670 }
6671 return true;
6672 };
6673
6674 delegates::FP16GraphPartitionHelper partition_helper(context,
6675 node_supported_fn);
6676 std::set<std::string> unsupported_nodes_info;
6677 if (partition_helper.Partition(&unsupported_nodes_info) == kTfLiteOk) {
6678 supported_nodes = partition_helper.GetNodesOfFirstNLargestPartitions();
6679 }
6680 return supported_nodes;
6681 }
6682
DoPrepare(TfLiteContext * context,TfLiteDelegate * delegate)6683 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
6684 TfLiteDelegate* delegate) {
6685 auto* delegate_data = static_cast<Data*>(delegate->data_);
6686 int* nnapi_errno = &(delegate_data->nnapi_errno);
6687 const NnApi* nnapi = delegate_data->nnapi;
6688
6689 // Resetting the error code when the delegate is initialized
6690 // by TFLite. This causes the error to be reset if reusing the same
6691 // StatefulNnApiDelegate after a failure
6692 *nnapi_errno = 0;
6693
6694 // Do not check nodes_ if NN API is unavailable.
6695 if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
6696 !nnapi->nnapi_exists) {
6697 return kTfLiteOk;
6698 }
6699
6700 int target_feature_level = nnapi->android_sdk_version;
6701 const StatefulNnApiDelegate::Options delegate_options =
6702 StatefulNnApiDelegate::GetOptions(delegate);
6703 // For NNAPI 1.2+, check if there is any accelerator available.
6704 // If not, don't delegate to NNAPI's CPU reference implementation unless
6705 // it has been specified as target accelerator.
6706 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
6707 if (ShouldUseTargetDevices(delegate_options, nnapi)) {
6708 std::vector<ANeuralNetworksDevice*> devices;
6709 TF_LITE_ENSURE_STATUS(
6710 GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
6711
6712 if (devices.empty()) {
6713 if (delegate_options.accelerator_name) {
6714 // There was a selected device and it is not available.
6715 return kTfLiteError;
6716 } else {
6717 // Only nnapi-reference is available but was disabled by the delegate
6718 // options
6719 return kTfLiteOk;
6720 }
6721 }
6722
6723 TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
6724 context, nnapi, devices, &target_feature_level, nnapi_errno));
6725 } else {
6726 // If no accelerator is specified, only use NNAPI if an accelerator is
6727 // available. Any available accelerator will make the device_count larger
6728 // than 1. More sophisticated check and allowlisting can be added later.
6729 uint32_t device_count = 0;
6730 RETURN_TFLITE_ERROR_IF_NN_ERROR(
6731 context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
6732 "getting number of NNAPI devices", nnapi_errno);
6733 if (device_count <= 1) {
6734 return kTfLiteOk;
6735 }
6736 }
6737 }
6738
6739 std::vector<int> supported_nodes;
6740 // We don't care about all nodes_, we only care about ones in the
6741 // current plan.
6742 TfLiteIntArray* execution_plan;
6743 TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &execution_plan));
6744 // Copy the execution plan and wrap it with unique_ptr.
6745 std::unique_ptr<TfLiteIntArray, decltype(&TfLiteIntArrayFree)> plan(
6746 TfLiteIntArrayCopy(execution_plan), TfLiteIntArrayFree);
6747
6748 // Check for every node if it is supported
6749 const bool is_accelerator_specified = ShouldUseTargetDevices(
6750 delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
6751 std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
6752 // First pass through execution plan to remember mapping of FP16->FP32
6753 // dequantizations in the graph.
6754 std::vector<int> fp16_to_fp32(context->tensors_size, -1);
6755 bool should_prune_fp16_dequantize = false;
6756 for (int i = 0; i < plan->size; ++i) {
6757 const int node_id = plan->data[i];
6758 TfLiteNode* node = nullptr;
6759 TfLiteRegistration* registration = nullptr;
6760 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
6761 context, node_id, &node, ®istration));
6762 if (IsDequantizeConstFloat16(context, node, registration)) {
6763 should_prune_fp16_dequantize = true;
6764 fp16_to_fp32[node->inputs->data[0]] = node->outputs->data[0];
6765 }
6766 }
6767 if (should_prune_fp16_dequantize) {
6768 supported_nodes = GetSupportedOpsWithFp16WeightRemapping(
6769 context, target_feature_level, is_accelerator_specified,
6770 delegate_options.max_number_delegated_partitions);
6771 } else {
6772 for (int node_index : TfLiteIntArrayView(plan.get())) {
6773 TfLiteNode* node;
6774 TfLiteRegistration* registration;
6775 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
6776 context, node_index, &node, ®istration));
6777 if (NNAPIDelegateKernel::Validate(
6778 context, registration, target_feature_level, node,
6779 is_accelerator_specified, delegate_options.vendor_plugin,
6780 &map_failures)) {
6781 supported_nodes.push_back(node_index);
6782 }
6783 #ifdef NNAPI_VERBOSE_VALIDATION
6784 for (auto& failure : map_failures) {
6785 TFLITE_LOG_PROD(
6786 TFLITE_LOG_WARNING,
6787 "Operator %s (v%d) refused by NNAPI delegate: %s",
6788 tflite::EnumNameBuiltinOperator(
6789 static_cast<BuiltinOperator>(registration->builtin_code)),
6790 registration->version, failure.message.c_str());
6791 }
6792 map_failures.clear();
6793 #endif
6794 }
6795 }
6796
6797 // If there are no delegated nodes, short-circuit node replacement.
6798 if (supported_nodes.empty()) {
6799 return kTfLiteOk;
6800 }
6801
6802 // NN API Delegate Registration (the pseudo kernel that will invoke NN
6803 // API node sub sets)
6804 static const TfLiteRegistration nnapi_delegate_kernel = {
6805 .init = [](TfLiteContext* context, const char* buffer,
6806 size_t length) -> void* {
6807 const TfLiteDelegateParams* params =
6808 reinterpret_cast<const TfLiteDelegateParams*>(buffer);
6809
6810 auto* delegate_data = static_cast<Data*>(params->delegate->data_);
6811 int* nnapi_errno = &(delegate_data->nnapi_errno);
6812
6813 NNAPIDelegateKernel* kernel_state =
6814 delegate_data->MaybeGetCachedDelegateKernel(params);
6815 if (!kernel_state) {
6816 kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi,
6817 delegate_data->vendor_plugin);
6818 kernel_state->Init(context, params, nnapi_errno);
6819 }
6820
6821 return kernel_state;
6822 },
6823
6824 .free = [](TfLiteContext* context, void* buffer) -> void {
6825 delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
6826 },
6827
6828 .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
6829 NNAPIDelegateKernel* state =
6830 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
6831 int* nnapi_errno =
6832 &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
6833 return state->Prepare(context, node, nnapi_errno);
6834 },
6835
6836 .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
6837 NNAPIDelegateKernel* state =
6838 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
6839 int* nnapi_errno =
6840 &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
6841 return state->Invoke(context, node, nnapi_errno);
6842 },
6843
6844 .profiling_string = nullptr,
6845 .builtin_code = kTfLiteBuiltinDelegate,
6846 .custom_name = "TfLiteNnapiDelegate",
6847 .version = 1,
6848 };
6849
6850 // Initialize caching, if applicable, from Options.
6851 const char* cache_dir = delegate_options.cache_dir;
6852 const char* model_token = delegate_options.model_token;
6853 delegates::SerializationParams params = {model_token, cache_dir};
6854 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
6855 model_token) {
6856 delegate_data->cache = std::make_unique<delegates::Serialization>(params);
6857 }
6858
6859 delegates::Serialization* cache_ptr = delegate_data->cache.get();
6860
6861 if (cache_ptr) {
6862 // Reuse cached delegation decision if possible.
6863 std::string accelerator_id = NnApiBackendId(delegate_options);
6864 TfLiteIntArray* cached_nodes_to_delegate = nullptr;
6865 if (delegates::GetDelegatedNodes(context, cache_ptr, accelerator_id,
6866 &cached_nodes_to_delegate) == kTfLiteOk) {
6867 if (cached_nodes_to_delegate->size == 0) return kTfLiteOk;
6868 auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
6869 context, nnapi_delegate_kernel, cached_nodes_to_delegate, delegate);
6870 TfLiteIntArrayFree(cached_nodes_to_delegate);
6871 return status;
6872 }
6873 }
6874
6875 std::vector<int> nodes_to_delegate;
6876
6877 int num_partitions;
6878 TfLiteDelegateParams* params_array;
6879 if (is_accelerator_specified &&
6880 nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
6881 // Filtering out nodes not supported by target accelerators.
6882 // Cannot query supported operation before NNAPI 1.2
6883 TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
6884 context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
6885 &num_partitions, ¶ms_array, nnapi_errno));
6886 } else {
6887 nodes_to_delegate = supported_nodes;
6888 auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
6889 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
6890 context, supported_nodes_int_array.get(), ¶ms_array,
6891 &num_partitions));
6892 }
6893
6894 // FP16GraphPartitionHelper alters the orginal graph by remapping fp32
6895 // dequantize output to fp16 input. In the case of accelerator backends does
6896 // not support all the nodes of the fp16 model, We need to restore original
6897 // graph in order for things to work.
6898 if (should_prune_fp16_dequantize &&
6899 supported_nodes.size() != nodes_to_delegate.size()) {
6900 // Restore original graph
6901 for (int execution_plan_index = 0; execution_plan_index < plan->size;
6902 ++execution_plan_index) {
6903 int node_index = plan->data[execution_plan_index];
6904 TfLiteNode* node = nullptr;
6905 TfLiteRegistration* reg = nullptr;
6906 TF_LITE_ENSURE_STATUS(
6907 context->GetNodeAndRegistration(context, node_index, &node, ®));
6908 if (reg->builtin_code == kTfLiteBuiltinDequantize) continue;
6909
6910 for (int i = 0; i < node->inputs->size; ++i) {
6911 const int original_input_idx = node->inputs->data[i];
6912 if (original_input_idx == kTfLiteOptionalTensor) continue;
6913 // Use original FP32 input
6914 if (context->tensors[original_input_idx].type == kTfLiteFloat16 &&
6915 fp16_to_fp32[original_input_idx] != -1) {
6916 node->inputs->data[i] = fp16_to_fp32[original_input_idx];
6917 }
6918 }
6919 }
6920 // Only allow full model delegation for fp16 model.
6921 return kTfLiteOk;
6922 }
6923
6924 TF_LITE_ENSURE_STATUS(
6925 LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
6926 std::vector<TfLiteDelegateParams>(
6927 params_array, params_array + num_partitions),
6928 &nodes_to_delegate));
6929
6930 auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
6931
6932 if (cache_ptr) {
6933 // Cache list of nodes to be delegated for later.
6934 std::string accelerator_id = NnApiBackendId(delegate_options);
6935 if (delegates::SaveDelegatedNodes(context, cache_ptr, accelerator_id,
6936 nodes_to_delegate_int_array.get()) !=
6937 kTfLiteOk) {
6938 // Not a critical error.
6939 TF_LITE_KERNEL_LOG(context, "Could not save delegated nodes");
6940 }
6941 }
6942
6943 if (nodes_to_delegate_int_array->size == 0) {
6944 return kTfLiteOk;
6945 } else {
6946 // Request TFLite to partition the graph and make kernels
6947 // for each independent node sub set a new nnapi_delegate_kernel.
6948 return context->ReplaceNodeSubsetsWithDelegateKernels(
6949 context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
6950 delegate);
6951 }
6952 }
6953
6954 // Returns a singleton NNAPI Delegate that can check for support of ops.
NnApiDelegate()6955 TfLiteDelegate* NnApiDelegate() {
6956 static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
6957 return delegate;
6958 }
6959
6960 } // namespace tflite
6961