xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/tf2tensorrt/convert/trt_parameters.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_COMPILER_TF2TENSORRT_TRT_PARAMETERS_H_
17 #define TENSORFLOW_COMPILER_TF2TENSORRT_TRT_PARAMETERS_H_
18 
19 #if GOOGLE_CUDA && GOOGLE_TENSORRT
20 
21 #include "tensorflow/core/platform/status.h"
22 
23 namespace tensorflow {
24 namespace tensorrt {
25 
26 // The PrecisionMode controls the precision used in TRT converted parts of the
27 // model. Setting PrecisionMode other than FP32 enables TensorRT to select
28 // lower-precision implementations when searching for the fastest kernels.
29 //
30 // For regularized models whose input dynamic range is approximately one, this
31 // typically produces significant speedups with negligible change in accuracy.
32 // There is additional complexity when working with INT8, see Calibration.
33 //
34 // - FP32
35 // - FP16 Enable FP16 layer selection, with FP32 fallback.
36 // - INT8 Enable Int8 layer selection, with FP32 and FP16 fallback.
37 //
38 // Note that TensorRT will still choose a higher-precision kernel if it results
39 // in overall lower runtime, or if no low-precision implementation exists.
40 enum class TrtPrecisionMode { FP32, FP16, INT8 };
41 
42 Status TrtPrecisionModeToName(const TrtPrecisionMode mode, string* name);
43 
44 Status TrtPrecisionModeFromName(const string& name, TrtPrecisionMode* mode);
45 
46 string DebugString(const TrtPrecisionMode mode);
47 
48 // Optimization profile generation strategies.
49 // - `kRange`: create one profile that works for inputs with dimension values
50 //   in the range of [min_dims, max_dims] where min_dims and max_dims are
51 //   derived from the provided inputs.
52 // - `kOptimal`: create one profile for each input. The profile only works for
53 //   inputs with the same dimensions as the input it is created for. The GPU
54 //   engine will be run with optimal performance with such inputs.
55 // - `kRangeOptimal`: create the profiles for both `Range` and `Optimal`.
56 // - `kImplicitBatchModeCompatible`: create the profiles that will produce the
57 //   same GPU engines as the implicit_batch_mode would produce.
58 enum class ProfileStrategy {
59   kRange,
60   kOptimal,
61   kRangeOptimal,
62   kImplicitBatchModeCompatible,
63 };
64 
65 string ProfileStrategyToName(const ProfileStrategy strategy);
66 Status ProfileStrategyFromName(const string& name, ProfileStrategy* strategy);
67 
68 }  // namespace tensorrt
69 }  // namespace tensorflow
70 
71 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
72 #endif  // TENSORFLOW_COMPILER_TF2TENSORRT_TRT_PARAMETERS_H_
73