xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/delegate_options.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_OPTIONS_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_OPTIONS_H_
18 
19 #include <stdint.h>
20 
21 #include "tensorflow/lite/c/common.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif  // __cplusplus
26 
27 // Encapsulated compilation/runtime tradeoffs.
28 enum TfLiteGpuInferenceUsage {
29   // Delegate will be used only once, therefore, bootstrap/init time should
30   // be taken into account.
31   TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0,
32 
33   // Prefer maximizing the throughput. Same delegate will be used repeatedly on
34   // multiple inputs.
35   TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1,
36 };
37 
38 enum TfLiteGpuInferencePriority {
39   // AUTO priority is needed when a single priority is the most important
40   // factor. For example,
41   // priority1 = MIN_LATENCY would result in the configuration that achieves
42   // maximum performance.
43   TFLITE_GPU_INFERENCE_PRIORITY_AUTO = 0,
44   TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = 1,
45   TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = 2,
46   TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = 3,
47 };
48 
49 // Used to toggle experimental flags used in the delegate. Note that this is a
50 // bitmask, so the values should be 1, 2, 4, 8, ...etc.
51 enum TfLiteGpuExperimentalFlags {
52   TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE = 0,
53   // Enables inference on quantized models with the delegate.
54   // NOTE: This is enabled in TfLiteGpuDelegateOptionsV2Default.
55   TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT = 1 << 0,
56   // Enforces execution with the provided backend.
57   TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY = 1 << 1,
58   TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY = 1 << 2,
59   // Enable serialization of GPU kernels & model data. Speeds up initilization
60   // at the cost of space on disk.
61   // Delegate performs serialization the first time it is applied with a new
62   // model or inference params. Later initializations are fast.
63   // ModifyGraphWithDelegate will fail if data cannot be serialized.
64   //
65   // NOTE: User also needs to set serialization_dir & model_token in
66   // TfLiteGpuDelegateOptionsV2.
67   // Currently works only if CL backend is used.
68   TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION = 1 << 3,
69 };
70 
71 // IMPORTANT: Always use TfLiteGpuDelegateOptionsV2Default() method to create
72 // new instance of TfLiteGpuDelegateOptionsV2, otherwise every new added option
73 // may break inference.
74 typedef struct {
75   // When set to zero, computations are carried out in maximal possible
76   // precision. Otherwise, the GPU may quantify tensors, downcast values,
77   // process in FP16 to increase performance. For most models precision loss is
78   // warranted.
79   // [OBSOLETE]: to be removed
80   int32_t is_precision_loss_allowed;
81 
82   // Preference is defined in TfLiteGpuInferenceUsage.
83   int32_t inference_preference;
84 
85   // Ordered priorities provide better control over desired semantics,
86   // where priority(n) is more important than priority(n+1), therefore,
87   // each time inference engine needs to make a decision, it uses
88   // ordered priorities to do so.
89   // For example:
90   //   MAX_PRECISION at priority1 would not allow to decrease precision,
91   //   but moving it to priority2 or priority3 would result in F16 calculation.
92   //
93   // Priority is defined in TfLiteGpuInferencePriority.
94   // AUTO priority can only be used when higher priorities are fully specified.
95   // For example:
96   //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
97   //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
98   //            priority3 = AUTO
99   //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
100   //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
101   //            priority3 = MAX_PRECISION
102   // Invalid priorities will result in error.
103   int32_t inference_priority1;
104   int32_t inference_priority2;
105   int32_t inference_priority3;
106 
107   // Bitmask flags. See the comments in TfLiteGpuExperimentalFlags.
108   int64_t experimental_flags;
109 
110   // A graph could have multiple partitions that can be delegated to the GPU.
111   // This limits the maximum number of partitions to be delegated. By default,
112   // it's set to 1 in TfLiteGpuDelegateOptionsV2Default().
113   int32_t max_delegated_partitions;
114 
115   // The nul-terminated directory to use for serialization.
116   // Whether serialization actually happens or not is dependent on backend used
117   // and validity of this directory.
118   // Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the
119   // delegate will not try serialization.
120   //
121   // NOTE: Users should ensure that this directory is private to the app to
122   // avoid data access issues.
123   const char* serialization_dir;
124 
125   // The unique nul-terminated token string that acts as a 'namespace' for
126   // all serialization entries.
127   // Should be unique to a particular model (graph & constants).
128   // For an example of how to generate this from a TFLite model, see
129   // StrFingerprint() in lite/delegates/serialization.h.
130   //
131   // Set to nullptr in TfLiteGpuDelegateOptionsV2Default(), which implies the
132   // delegate will not try serialization.
133   const char* model_token;
134 } TfLiteGpuDelegateOptionsV2;
135 
136 // Populates TfLiteGpuDelegateOptionsV2 as follows:
137 //   is_precision_loss_allowed = false
138 //   inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER
139 //   priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION
140 //   priority2 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
141 //   priority3 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO
142 //   experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT
143 //   max_delegated_partitions = 1
144 TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default();
145 
146 #ifdef __cplusplus
147 }
148 #endif  // __cplusplus
149 
150 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_OPTIONS_H_
151