1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_TF2TENSORRT_TRT_PARAMETERS_H_ 17 #define TENSORFLOW_COMPILER_TF2TENSORRT_TRT_PARAMETERS_H_ 18 19 #if GOOGLE_CUDA && GOOGLE_TENSORRT 20 21 #include "tensorflow/core/platform/status.h" 22 23 namespace tensorflow { 24 namespace tensorrt { 25 26 // The PrecisionMode controls the precision used in TRT converted parts of the 27 // model. Setting PrecisionMode other than FP32 enables TensorRT to select 28 // lower-precision implementations when searching for the fastest kernels. 29 // 30 // For regularized models whose input dynamic range is approximately one, this 31 // typically produces significant speedups with negligible change in accuracy. 32 // There is additional complexity when working with INT8, see Calibration. 33 // 34 // - FP32 35 // - FP16 Enable FP16 layer selection, with FP32 fallback. 36 // - INT8 Enable Int8 layer selection, with FP32 and FP16 fallback. 37 // 38 // Note that TensorRT will still choose a higher-precision kernel if it results 39 // in overall lower runtime, or if no low-precision implementation exists. 40 enum class TrtPrecisionMode { FP32, FP16, INT8 }; 41 42 Status TrtPrecisionModeToName(const TrtPrecisionMode mode, string* name); 43 44 Status TrtPrecisionModeFromName(const string& name, TrtPrecisionMode* mode); 45 46 string DebugString(const TrtPrecisionMode mode); 47 48 // Optimization profile generation strategies. 49 // - `kRange`: create one profile that works for inputs with dimension values 50 // in the range of [min_dims, max_dims] where min_dims and max_dims are 51 // derived from the provided inputs. 52 // - `kOptimal`: create one profile for each input. The profile only works for 53 // inputs with the same dimensions as the input it is created for. The GPU 54 // engine will be run with optimal performance with such inputs. 55 // - `kRangeOptimal`: create the profiles for both `Range` and `Optimal`. 56 // - `kImplicitBatchModeCompatible`: create the profiles that will produce the 57 // same GPU engines as the implicit_batch_mode would produce. 58 enum class ProfileStrategy { 59 kRange, 60 kOptimal, 61 kRangeOptimal, 62 kImplicitBatchModeCompatible, 63 }; 64 65 string ProfileStrategyToName(const ProfileStrategy strategy); 66 Status ProfileStrategyFromName(const string& name, ProfileStrategy* strategy); 67 68 } // namespace tensorrt 69 } // namespace tensorflow 70 71 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT 72 #endif // TENSORFLOW_COMPILER_TF2TENSORRT_TRT_PARAMETERS_H_ 73