1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h"
17
18 #include <string>
19 #include <vector>
20
21 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
22 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
23 #include "tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h"
24 #include "tensorflow/compiler/tf2tensorrt/utils/trt_execution_context.h"
25 #include "tensorflow/core/framework/tensor.h"
26 #include "tensorflow/core/framework/tensor_shape.h"
27 #include "tensorflow/core/lib/core/status.h"
28 #include "tensorflow/core/platform/errors.h"
29 #include "tensorflow/core/profiler/lib/traceme.h"
30
31 #if GOOGLE_CUDA && GOOGLE_TENSORRT
32 #include "third_party/tensorrt/NvInfer.h"
33
34 namespace tensorflow {
35 namespace tensorrt {
36
37 using absl::StrCat;
38
Create(nvinfer1::ICudaEngine * cuda_engine)39 ExecutionContext ExecutionContext::Create(nvinfer1::ICudaEngine* cuda_engine) {
40 bool has_int32_output = false;
41 for (int i = 0; i < cuda_engine->getNbBindings(); i++) {
42 if (!cuda_engine->bindingIsInput(i) &&
43 cuda_engine->getBindingDataType(i) == nvinfer1::DataType::kINT32) {
44 has_int32_output = true;
45 break;
46 }
47 }
48 if (!IS_TRT_VERSION_GE(8, 0, 0, 0) && has_int32_output) {
49 // TODO(nvbugs/3390469): Remove this workaround when the bug is fixed.
50 nvinfer1::IExecutionContext* execution_context =
51 cuda_engine->createExecutionContext();
52 return ExecutionContext(execution_context, true);
53 }
54
55 nvinfer1::IExecutionContext* execution_context =
56 cuda_engine->createExecutionContextWithoutDeviceMemory();
57 return ExecutionContext(execution_context, false);
58 }
59
GetTrtBindingShape(const nvinfer1::ICudaEngine * cuda_engine,const nvinfer1::IExecutionContext * execution_context,int binding_index,bool use_implicit_batch,int batch_size,TensorShape & shape)60 Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine,
61 const nvinfer1::IExecutionContext* execution_context,
62 int binding_index, bool use_implicit_batch,
63 int batch_size, TensorShape& shape) {
64 tensorflow::profiler::TraceMe activity(
65 "getBindingDimensions", tensorflow::profiler::TraceMeLevel::kInfo);
66 nvinfer1::Dims dims =
67 use_implicit_batch
68 ? cuda_engine->getBindingDimensions(binding_index)
69 : execution_context->getBindingDimensions(binding_index);
70 if (!use_implicit_batch) {
71 if (dims.nbDims == -1) {
72 return errors::Internal(
73 "Binding index out of range. This can happen if profile is not set, "
74 "or the network is invalid for the current profile.");
75 }
76 }
77 TF_RETURN_IF_ERROR(DimsAdapter(dims).TensorShape(
78 &shape,
79 use_implicit_batch ? std::optional<int>(batch_size) : std::nullopt));
80 return Status::OK();
81 }
82
SetupBindings(nvinfer1::ICudaEngine * cuda_engine,const Tensor & tensor,std::vector<void * > & buffers,int binding_index)83 Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, const Tensor& tensor,
84 std::vector<void*>& buffers, int binding_index) {
85 tensorflow::profiler::TraceMe activity(
86 "SetBindingPointers", tensorflow::profiler::TraceMeLevel::kInfo);
87 const auto dtype = cuda_engine->getBindingDataType(binding_index);
88 VLOG(2) << "<<<<<<<<< SetupBindings with dtype = " << (int)dtype;
89 switch (dtype) {
90 case nvinfer1::DataType::kFLOAT:
91 buffers[binding_index] = const_cast<float*>(tensor.flat<float>().data());
92 break;
93 case nvinfer1::DataType::kHALF:
94 buffers[binding_index] =
95 const_cast<Eigen::half*>(tensor.flat<Eigen::half>().data());
96 break;
97 case nvinfer1::DataType::kINT8:
98 return errors::Internal("INT8 inputs are not supported yet!");
99 case nvinfer1::DataType::kINT32:
100 buffers[binding_index] = const_cast<int32*>(tensor.flat<int32>().data());
101 break;
102 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
103 case nvinfer1::DataType::kBOOL:
104 buffers[binding_index] = const_cast<bool*>(tensor.flat<bool>().data());
105 break;
106 #endif
107 default:
108 return errors::Internal("Unknown TRT data type: ",
109 static_cast<int>(dtype));
110 }
111 return Status::OK();
112 }
113
114 // Sets up bindings.
SetTrtEngineInputs(nvinfer1::ICudaEngine * cuda_engine,nvinfer1::IExecutionContext * execution_context,const int trt_profile_idx,std::vector<void * > & buffers,bool use_implicit_batch,int num_batch,const TrtShapeOptimizationProfile & profiles,OpKernelContext * ctx,const DataVec * input_vec)115 Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine,
116 nvinfer1::IExecutionContext* execution_context,
117 const int trt_profile_idx,
118 std::vector<void*>& buffers, bool use_implicit_batch,
119 int num_batch,
120 const TrtShapeOptimizationProfile& profiles,
121 OpKernelContext* ctx, const DataVec* input_vec) {
122 tensorflow::profiler::TraceMe activity(
123 "SetTrtEngineInputs", tensorflow::profiler::TraceMeLevel::kInfo);
124 int n_inputs = ctx ? ctx->num_inputs() : (input_vec ? input_vec->size() : 0);
125 // Setup engine inputs.
126 for (int i = 0; i < n_inputs; i++) {
127 const Tensor& input_tensor = ctx ? ctx->input(i) : input_vec->at(i).tensor;
128 const TensorShape& input_shape = input_tensor.shape();
129
130 // Skip resource inputs.
131 if (input_tensor.dtype() == DataType::DT_RESOURCE) {
132 continue;
133 }
134
135 const string input_name =
136 ctx ? StrCat(IONamePrefixes::kInputPHName, i) : input_vec->at(i).name;
137 int binding_index;
138 Status status = GetTrtBindingIndex(input_name.c_str(), trt_profile_idx,
139 cuda_engine, &binding_index);
140 if (IS_TRT_VERSION_GE(8, 0, 0, 0)) {
141 TF_RETURN_IF_ERROR(status);
142 } else if (!status.ok()) {
143 // Before TRT 8, an input tensor can be pruned if it is not used by the
144 // network (e.g. only its shape is used, but the shape is already defined
145 // by the optimization profile by setting min=max). nvbugs/3153064
146 VLOG(2) << "Skipping pruned input " << input_name;
147 continue;
148 }
149
150 if (use_implicit_batch && ctx) {
151 // Ensure all inputs have the same batch size
152 if (num_batch != input_shape.dim_size(0)) {
153 const string msg =
154 StrCat("Input data has inconsistent batch size: ", num_batch,
155 " vs ", input_shape.dim_size(0));
156 return errors::NotFound(msg);
157 }
158 }
159 // Set known input dimensions. This is necessary because TRT network
160 // could be made with dynamic dimensions.
161 if (!use_implicit_batch) {
162 TF_RETURN_IF_ERROR(profiles.SetInputShapeBinding(
163 i, binding_index, cuda_engine, execution_context));
164
165 if (cuda_engine->isExecutionBinding(binding_index)) {
166 tensorflow::profiler::TraceMe activity(
167 "SetTrtEngineInputs::setBindingDimensions",
168 tensorflow::profiler::TraceMeLevel::kInfo);
169 auto adap = DimsAdapter::Create(input_shape);
170 TRT_ENSURE_OK(adap);
171 nvinfer1::Dims trt_dims = adap->AsTrtDims();
172 if (execution_context->getBindingDimensions(binding_index) !=
173 trt_dims) {
174 VLOG(2) << "Setting binding dimensions for idx " << binding_index;
175 bool ret =
176 execution_context->setBindingDimensions(binding_index, trt_dims);
177 if (!ret) {
178 VLOG(2) << "Error setting engine input " << binding_index << " "
179 << DebugString(trt_dims);
180 return errors::Internal(
181 "Binding dimension does not fit selected profile.");
182 }
183 }
184 }
185 }
186 // Setup input bindings.
187 TF_RETURN_IF_ERROR(
188 SetupBindings(cuda_engine, input_tensor, buffers, binding_index));
189 }
190
191 // Ensure all network dynamic dimensions (if any) are set in execution
192 // context.
193 if (!execution_context->allInputDimensionsSpecified()) {
194 return errors::Internal(
195 "Failed to set dimensions for all dynamic input tensors");
196 }
197 if (!execution_context->allInputShapesSpecified()) {
198 return errors::Internal(
199 "Failed to set dimensions for all shape input tensors.");
200 }
201 return Status::OK();
202 }
203
SetTrtEngineOutputs(nvinfer1::ICudaEngine * cuda_engine,nvinfer1::IExecutionContext * execution_context,int trt_profile_idx,std::vector<void * > & buffers,bool use_implicit_batch,int batch_size,OpKernelContext * ctx,DataVec * outputs)204 Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine,
205 nvinfer1::IExecutionContext* execution_context,
206 int trt_profile_idx, std::vector<void*>& buffers,
207 bool use_implicit_batch, int batch_size,
208 OpKernelContext* ctx, DataVec* outputs) {
209 tensorflow::profiler::TraceMe activity(
210 "SetTrtEngineOutputs", tensorflow::profiler::TraceMeLevel::kInfo);
211 // Either one of ctx or outpus should be specified
212 int n_outputs = ctx ? ctx->num_outputs() : (outputs ? outputs->size() : 0);
213 for (int i = 0; i < n_outputs; i++) {
214 const string output_name =
215 ctx ? StrCat(IONamePrefixes::kOutputPHName, i) : outputs->at(i).name;
216 int binding_index;
217 TF_RETURN_IF_ERROR(GetTrtBindingIndex(output_name.c_str(), trt_profile_idx,
218 cuda_engine, &binding_index));
219
220 // Get TRT output shapes for allocating output memory.
221 TensorShape output_shape;
222 TF_RETURN_IF_ERROR(GetTrtBindingShape(cuda_engine, execution_context,
223 binding_index, use_implicit_batch,
224 batch_size, output_shape));
225
226 // Allocate output tensor of TRTEngineOp.
227 Tensor* output_tensor = nullptr;
228 if (ctx) {
229 tensorflow::profiler::TraceMe activity(
230 "AllocateOutput", tensorflow::profiler::TraceMeLevel::kInfo);
231 TF_RETURN_IF_ERROR(ctx->allocate_output(i, output_shape, &output_tensor));
232 } else {
233 // This path is used for unit tests. The tensor is already allocated.
234 // Its shape is not necessarily set correctly, we fix that.
235 VLOG(2) << "Applying shape " << output_shape.DebugString()
236 << " on output.";
237 output_tensor = &(outputs->at(i).tensor);
238 bool status = output_tensor->CopyFrom(*output_tensor, output_shape);
239 if (!status) {
240 return errors::Internal(
241 "Buffer size (", output_tensor->NumElements(),
242 ") do not match while reshaping output tensors to shape ",
243 output_shape.DebugString());
244 }
245 }
246
247 // Set up output bindings.
248 TF_RETURN_IF_ERROR(
249 SetupBindings(cuda_engine, *output_tensor, buffers, binding_index));
250 }
251 return Status::OK();
252 }
253
TrtEnqueue(nvinfer1::IExecutionContext * execution_context,std::vector<void * > & buffers,cudaStream_t stream,bool use_implicit_batch,int batch_size)254 Status TrtEnqueue(nvinfer1::IExecutionContext* execution_context,
255 std::vector<void*>& buffers, cudaStream_t stream,
256 bool use_implicit_batch, int batch_size) {
257 tensorflow::profiler::TraceMe activity(
258 "TrtEnqueue", tensorflow::profiler::TraceMeLevel::kInfo);
259 bool ret = false;
260 if (use_implicit_batch) {
261 ret = execution_context->enqueue(batch_size, &buffers[0], stream, nullptr);
262 VLOG(1) << "Called IExecutionContext::enqueue";
263 } else {
264 ret = execution_context->enqueueV2(&buffers[0], stream, nullptr);
265 VLOG(1) << "Called IExecutionContext::enqueueV2";
266 }
267 if (!ret) {
268 return errors::Internal("Failed to enqueue batch for TRT engine");
269 }
270 // Synchronization will be done by TF.
271 return Status::OK();
272 }
273
274 } // namespace tensorrt
275 } // namespace tensorflow
276
277 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT
278