1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_TF2TENSORRT_TRT_CONVERT_H_ 17 #define TENSORFLOW_COMPILER_TF2TENSORRT_TRT_CONVERT_H_ 18 19 #include <climits> 20 #include <string> 21 #include <vector> 22 23 #if GOOGLE_CUDA && GOOGLE_TENSORRT 24 25 #include "tensorflow/compiler/tf2tensorrt/common/utils.h" 26 #include "tensorflow/compiler/tf2tensorrt/convert/trt_parameters.h" 27 #include "tensorflow/core/framework/tensor.h" 28 #include "tensorflow/core/platform/statusor.h" 29 #include "tensorflow/core/protobuf/meta_graph.pb.h" 30 31 namespace tensorflow { 32 33 struct SavedModelBundle; 34 35 namespace tensorrt { 36 37 struct TfTrtConversionParams { 38 // Corresponds 'workspaceSize' parameter of 39 // nvinfer1::IBuilderConfig::setMaxWorkspaceSize. 40 #if IS_TRT_VERSION_GE(8, 4, 0, 0) 41 // Must use `LLONG_MAX - 512` to avoid overflow during casting. 42 size_t max_workspace_size_bytes = LLONG_MAX - 512; 43 #else 44 size_t max_workspace_size_bytes = 1 << 30; // 1,073,741,824 45 #endif 46 47 // Minimum precision used by the TRT Engine. 48 TrtPrecisionMode precision_mode = TrtPrecisionMode::FP32; 49 50 // The minimum number of nodes required for a subgraph to be replaced by 51 // TRTEngineOp. Note that many small TRT subgraphs could be detrimental for 52 // performance, increasing the minimum segment size can help avoid the 53 // problem. 54 int minimum_segment_size = 3; 55 56 // Max number of cached TRT engines for dynamic TRT ops (by default we have 57 // dynamic TRT ops). 58 int max_cached_engines = 1; 59 60 // Note that calibration is currently not implemented with the C++ converter. 61 // This argument is ignored if precision_mode is not INT8. If set to True, the 62 // implementation will use the user provided inputs to generate calibration 63 // data. If set to False, quantization nodes will be expected for every tensor 64 // in the graph (excluding those which will be fused). If a range is missing, 65 // an error will occur. Please note that accuracy may be negatively affected 66 // if there is a mismatch between which tensors TRT quantizes and which 67 // tensors were trained with fake quantization. 68 bool use_calibration = true; 69 70 // Whether to enable dynamic shape mode for the TRT engines. It is 71 // recommended to use_dynamic_shape mode to handle dynamic input shape. 72 // Enabling dynamic shape mode can also improve the conversion rate of graphs 73 // with static input shape. 74 bool use_dynamic_shape = true; 75 76 // In dynamic shape mode we create an engine that can handle various input 77 // shape ranges. We derive the shape optimization profiles for the TRT engines 78 // in the graph based on user provided input data and profile_strategy. 79 ProfileStrategy profile_strategy = ProfileStrategy::kRange; 80 81 // Whether to allow bulding TRT engines at runtime. If no TensorRT engine can 82 // be found in cache that can handle the given inputs during runtime, then a 83 // new TensorRT engine is built at runtime if allow_build_at_runtime=True, 84 // otherwise native TF is used. We recommend to set this value false and build 85 // the engine in advance, to avoid runtime overhead. 86 bool allow_build_at_runtime = true; 87 88 // Record the TRT engine as an attribute of the TRTEngineOp. This is only 89 // valid when max_cached_engines == 1. Note: the frozen graph together with 90 // the serialized engines have to be below 2GiB (protobuf size limit). If 91 // convert_to_static_engine = false, then the converted graph_def only 92 // contains placeholder TRTEngineOp nodes. 93 bool convert_to_static_engine = true; 94 }; 95 96 /** 97 * Converts the graph with TF-TRT. 98 * 99 * Performs TF-TRT conversion and returns the converted GraphDef. If inputs is 100 * not empty and convert_to_static_engine is requested, we also build the 101 * engines and convert the engines to static engines. 102 * 103 * Arguments: 104 * - frozen_graph_def input graph, it is assumed to be frozen 105 * - input_names names of the input tensors 106 * - output_names names of the output tensors 107 * - inputs tensors that we will use as input while building the TRT engines 108 * - conv_params parameters for the TF-TRT conversion 109 * 110 * Returns the converted graph_def. 111 */ 112 StatusOr<GraphDef> ConvertAndBuild( 113 const GraphDef& frozen_graph_def, const std::vector<string>& input_names, 114 const std::vector<string>& output_names, 115 const std::vector<std::vector<tensorflow::Tensor>>& inputs, 116 const TfTrtConversionParams& conv_params); 117 118 StatusOr<GraphDef> ConvertAndBuild( 119 SavedModelBundle* bundle, 120 const std::string& signature_key = "serving_default", 121 const std::vector<std::vector<tensorflow::Tensor>>& inputs = {}, 122 const TfTrtConversionParams& conversion_params = TfTrtConversionParams()); 123 124 } // namespace tensorrt 125 } // namespace tensorflow 126 127 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT 128 129 #endif // TENSORFLOW_COMPILER_TF2TENSORRT_TRT_CONVERT_H_ 130