1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_OPTIONS_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_OPTIONS_H_ 18 19 #include <stdint.h> 20 21 #include "tensorflow/lite/c/common.h" 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif // __cplusplus 26 27 // Encapsulated compilation/runtime tradeoffs. 28 enum TfLiteGpuInferenceUsage { 29 // Delegate will be used only once, therefore, bootstrap/init time should 30 // be taken into account. 31 TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0, 32 33 // Prefer maximizing the throughput. Same delegate will be used repeatedly on 34 // multiple inputs. 35 TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1, 36 }; 37 38 enum TfLiteGpuInferencePriority { 39 // AUTO priority is needed when a single priority is the most important 40 // factor. For example, 41 // priority1 = MIN_LATENCY would result in the configuration that achieves 42 // maximum performance. 43 TFLITE_GPU_INFERENCE_PRIORITY_AUTO = 0, 44 TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = 1, 45 TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = 2, 46 TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = 3, 47 }; 48 49 // Used to toggle experimental flags used in the delegate. Note that this is a 50 // bitmask, so the values should be 1, 2, 4, 8, ...etc. 51 enum TfLiteGpuExperimentalFlags { 52 TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE = 0, 53 // Enables inference on quantized models with the delegate. 54 // NOTE: This is enabled in TfLiteGpuDelegateOptionsV2Default. 55 TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT = 1 << 0, 56 // Enforces execution with the provided backend. 57 TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY = 1 << 1, 58 TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY = 1 << 2, 59 // Enable serialization of GPU kernels & model data. Speeds up initilization 60 // at the cost of space on disk. 61 // Delegate performs serialization the first time it is applied with a new 62 // model or inference params. Later initializations are fast. 63 // ModifyGraphWithDelegate will fail if data cannot be serialized. 64 // 65 // NOTE: User also needs to set serialization_dir & model_token in 66 // TfLiteGpuDelegateOptionsV2. 67 // Currently works only if CL backend is used. 68 TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION = 1 << 3, 69 }; 70 71 // IMPORTANT: Always use TfLiteGpuDelegateOptionsV2Default() method to create 72 // new instance of TfLiteGpuDelegateOptionsV2, otherwise every new added option 73 // may break inference. 74 typedef struct { 75 // When set to zero, computations are carried out in maximal possible 76 // precision. Otherwise, the GPU may quantify tensors, downcast values, 77 // process in FP16 to increase performance. For most models precision loss is 78 // warranted. 79 // [OBSOLETE]: to be removed 80 int32_t is_precision_loss_allowed; 81 82 // Preference is defined in TfLiteGpuInferenceUsage. 83 int32_t inference_preference; 84 85 // Ordered priorities provide better control over desired semantics, 86 // where priority(n) is more important than priority(n+1), therefore, 87 // each time inference engine needs to make a decision, it uses 88 // ordered priorities to do so. 89 // For example: 90 // MAX_PRECISION at priority1 would not allow to decrease precision, 91 // but moving it to priority2 or priority3 would result in F16 calculation. 92 // 93 // Priority is defined in TfLiteGpuInferencePriority. 94 // AUTO priority can only be used when higher priorities are fully specified. 95 // For example: 96 // VALID: priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO 97 // VALID: priority1 = MIN_LATENCY, priority2 = MAX_PRECISION, 98 // priority3 = AUTO 99 // INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO 100 // INVALID: priority1 = MIN_LATENCY, priority2 = AUTO, 101 // priority3 = MAX_PRECISION 102 // Invalid priorities will result in error. 103 int32_t inference_priority1; 104 int32_t inference_priority2; 105 int32_t inference_priority3; 106 107 // Bitmask flags. See the comments in TfLiteGpuExperimentalFlags. 108 int64_t experimental_flags; 109 110 // A graph could have multiple partitions that can be delegated to the GPU. 111 // This limits the maximum number of partitions to be delegated. By default, 112 // it's set to 1 in TfLiteGpuDelegateOptionsV2Default(). 113 int32_t max_delegated_partitions; 114 115 // The nul-terminated directory to use for serialization. 116 // Whether serialization actually happens or not is dependent on backend used 117 // and validity of this directory. 118 // Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the 119 // delegate will not try serialization. 120 // 121 // NOTE: Users should ensure that this directory is private to the app to 122 // avoid data access issues. 123 const char* serialization_dir; 124 125 // The unique nul-terminated token string that acts as a 'namespace' for 126 // all serialization entries. 127 // Should be unique to a particular model (graph & constants). 128 // For an example of how to generate this from a TFLite model, see 129 // StrFingerprint() in lite/delegates/serialization.h. 130 // 131 // Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the 132 // delegate will not try serialization. 133 const char* model_token; 134 } TfLiteGpuDelegateOptionsV2; 135 136 // Populates TfLiteGpuDelegateOptionsV2 as follows: 137 // is_precision_loss_allowed = false 138 // inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER 139 // priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION 140 // priority2 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO 141 // priority3 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO 142 // experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT 143 // max_delegated_partitions = 1 144 TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default(); 145 146 #ifdef __cplusplus 147 } 148 #endif // __cplusplus 149 150 #endif // TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_OPTIONS_H_ 151