xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h"
17 
18 #include <string>
19 #include <vector>
20 
21 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
22 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
23 #include "tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h"
24 #include "tensorflow/compiler/tf2tensorrt/utils/trt_execution_context.h"
25 #include "tensorflow/core/framework/tensor.h"
26 #include "tensorflow/core/framework/tensor_shape.h"
27 #include "tensorflow/core/lib/core/status.h"
28 #include "tensorflow/core/platform/errors.h"
29 #include "tensorflow/core/profiler/lib/traceme.h"
30 
31 #if GOOGLE_CUDA && GOOGLE_TENSORRT
32 #include "third_party/tensorrt/NvInfer.h"
33 
34 namespace tensorflow {
35 namespace tensorrt {
36 
37 using absl::StrCat;
38 
Create(nvinfer1::ICudaEngine * cuda_engine)39 ExecutionContext ExecutionContext::Create(nvinfer1::ICudaEngine* cuda_engine) {
40   bool has_int32_output = false;
41   for (int i = 0; i < cuda_engine->getNbBindings(); i++) {
42     if (!cuda_engine->bindingIsInput(i) &&
43         cuda_engine->getBindingDataType(i) == nvinfer1::DataType::kINT32) {
44       has_int32_output = true;
45       break;
46     }
47   }
48   if (!IS_TRT_VERSION_GE(8, 0, 0, 0) && has_int32_output) {
49     // TODO(nvbugs/3390469): Remove this workaround when the bug is fixed.
50     nvinfer1::IExecutionContext* execution_context =
51         cuda_engine->createExecutionContext();
52     return ExecutionContext(execution_context, true);
53   }
54 
55   nvinfer1::IExecutionContext* execution_context =
56       cuda_engine->createExecutionContextWithoutDeviceMemory();
57   return ExecutionContext(execution_context, false);
58 }
59 
GetTrtBindingShape(const nvinfer1::ICudaEngine * cuda_engine,const nvinfer1::IExecutionContext * execution_context,int binding_index,bool use_implicit_batch,int batch_size,TensorShape & shape)60 Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine,
61                           const nvinfer1::IExecutionContext* execution_context,
62                           int binding_index, bool use_implicit_batch,
63                           int batch_size, TensorShape& shape) {
64   tensorflow::profiler::TraceMe activity(
65       "getBindingDimensions", tensorflow::profiler::TraceMeLevel::kInfo);
66   nvinfer1::Dims dims =
67       use_implicit_batch
68           ? cuda_engine->getBindingDimensions(binding_index)
69           : execution_context->getBindingDimensions(binding_index);
70   if (!use_implicit_batch) {
71     if (dims.nbDims == -1) {
72       return errors::Internal(
73           "Binding index out of range. This can happen if profile is not set, "
74           "or the network is invalid for the current profile.");
75     }
76   }
77   TF_RETURN_IF_ERROR(DimsAdapter(dims).TensorShape(
78       &shape,
79       use_implicit_batch ? std::optional<int>(batch_size) : std::nullopt));
80   return Status::OK();
81 }
82 
SetupBindings(nvinfer1::ICudaEngine * cuda_engine,const Tensor & tensor,std::vector<void * > & buffers,int binding_index)83 Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, const Tensor& tensor,
84                      std::vector<void*>& buffers, int binding_index) {
85   tensorflow::profiler::TraceMe activity(
86       "SetBindingPointers", tensorflow::profiler::TraceMeLevel::kInfo);
87   const auto dtype = cuda_engine->getBindingDataType(binding_index);
88   VLOG(2) << "<<<<<<<<< SetupBindings with dtype = " << (int)dtype;
89   switch (dtype) {
90     case nvinfer1::DataType::kFLOAT:
91       buffers[binding_index] = const_cast<float*>(tensor.flat<float>().data());
92       break;
93     case nvinfer1::DataType::kHALF:
94       buffers[binding_index] =
95           const_cast<Eigen::half*>(tensor.flat<Eigen::half>().data());
96       break;
97     case nvinfer1::DataType::kINT8:
98       return errors::Internal("INT8 inputs are not supported yet!");
99     case nvinfer1::DataType::kINT32:
100       buffers[binding_index] = const_cast<int32*>(tensor.flat<int32>().data());
101       break;
102 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
103     case nvinfer1::DataType::kBOOL:
104       buffers[binding_index] = const_cast<bool*>(tensor.flat<bool>().data());
105       break;
106 #endif
107     default:
108       return errors::Internal("Unknown TRT data type: ",
109                               static_cast<int>(dtype));
110   }
111   return Status::OK();
112 }
113 
114 // Sets up bindings.
SetTrtEngineInputs(nvinfer1::ICudaEngine * cuda_engine,nvinfer1::IExecutionContext * execution_context,const int trt_profile_idx,std::vector<void * > & buffers,bool use_implicit_batch,int num_batch,const TrtShapeOptimizationProfile & profiles,OpKernelContext * ctx,const DataVec * input_vec)115 Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine,
116                           nvinfer1::IExecutionContext* execution_context,
117                           const int trt_profile_idx,
118                           std::vector<void*>& buffers, bool use_implicit_batch,
119                           int num_batch,
120                           const TrtShapeOptimizationProfile& profiles,
121                           OpKernelContext* ctx, const DataVec* input_vec) {
122   tensorflow::profiler::TraceMe activity(
123       "SetTrtEngineInputs", tensorflow::profiler::TraceMeLevel::kInfo);
124   int n_inputs = ctx ? ctx->num_inputs() : (input_vec ? input_vec->size() : 0);
125   // Setup engine inputs.
126   for (int i = 0; i < n_inputs; i++) {
127     const Tensor& input_tensor = ctx ? ctx->input(i) : input_vec->at(i).tensor;
128     const TensorShape& input_shape = input_tensor.shape();
129 
130     // Skip resource inputs.
131     if (input_tensor.dtype() == DataType::DT_RESOURCE) {
132       continue;
133     }
134 
135     const string input_name =
136         ctx ? StrCat(IONamePrefixes::kInputPHName, i) : input_vec->at(i).name;
137     int binding_index;
138     Status status = GetTrtBindingIndex(input_name.c_str(), trt_profile_idx,
139                                        cuda_engine, &binding_index);
140     if (IS_TRT_VERSION_GE(8, 0, 0, 0)) {
141       TF_RETURN_IF_ERROR(status);
142     } else if (!status.ok()) {
143       // Before TRT 8, an input tensor can be pruned if it is not used by the
144       // network (e.g. only its shape is used, but the shape is already defined
145       // by the optimization profile by setting min=max). nvbugs/3153064
146       VLOG(2) << "Skipping pruned input " << input_name;
147       continue;
148     }
149 
150     if (use_implicit_batch && ctx) {
151       // Ensure all inputs have the same batch size
152       if (num_batch != input_shape.dim_size(0)) {
153         const string msg =
154             StrCat("Input data has inconsistent batch size: ", num_batch,
155                    " vs ", input_shape.dim_size(0));
156         return errors::NotFound(msg);
157       }
158     }
159     // Set known input dimensions. This is necessary because TRT network
160     // could be made with dynamic dimensions.
161     if (!use_implicit_batch) {
162       TF_RETURN_IF_ERROR(profiles.SetInputShapeBinding(
163           i, binding_index, cuda_engine, execution_context));
164 
165       if (cuda_engine->isExecutionBinding(binding_index)) {
166         tensorflow::profiler::TraceMe activity(
167             "SetTrtEngineInputs::setBindingDimensions",
168             tensorflow::profiler::TraceMeLevel::kInfo);
169         auto adap = DimsAdapter::Create(input_shape);
170         TRT_ENSURE_OK(adap);
171         nvinfer1::Dims trt_dims = adap->AsTrtDims();
172         if (execution_context->getBindingDimensions(binding_index) !=
173             trt_dims) {
174           VLOG(2) << "Setting binding dimensions for idx " << binding_index;
175           bool ret =
176               execution_context->setBindingDimensions(binding_index, trt_dims);
177           if (!ret) {
178             VLOG(2) << "Error setting engine input " << binding_index << " "
179                     << DebugString(trt_dims);
180             return errors::Internal(
181                 "Binding dimension does not fit selected profile.");
182           }
183         }
184       }
185     }
186     // Setup input bindings.
187     TF_RETURN_IF_ERROR(
188         SetupBindings(cuda_engine, input_tensor, buffers, binding_index));
189   }
190 
191   // Ensure all network dynamic dimensions (if any) are set in execution
192   // context.
193   if (!execution_context->allInputDimensionsSpecified()) {
194     return errors::Internal(
195         "Failed to set dimensions for all dynamic input tensors");
196   }
197   if (!execution_context->allInputShapesSpecified()) {
198     return errors::Internal(
199         "Failed to set dimensions for all shape input tensors.");
200   }
201   return Status::OK();
202 }
203 
SetTrtEngineOutputs(nvinfer1::ICudaEngine * cuda_engine,nvinfer1::IExecutionContext * execution_context,int trt_profile_idx,std::vector<void * > & buffers,bool use_implicit_batch,int batch_size,OpKernelContext * ctx,DataVec * outputs)204 Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine,
205                            nvinfer1::IExecutionContext* execution_context,
206                            int trt_profile_idx, std::vector<void*>& buffers,
207                            bool use_implicit_batch, int batch_size,
208                            OpKernelContext* ctx, DataVec* outputs) {
209   tensorflow::profiler::TraceMe activity(
210       "SetTrtEngineOutputs", tensorflow::profiler::TraceMeLevel::kInfo);
211   // Either one of ctx or outpus should be specified
212   int n_outputs = ctx ? ctx->num_outputs() : (outputs ? outputs->size() : 0);
213   for (int i = 0; i < n_outputs; i++) {
214     const string output_name =
215         ctx ? StrCat(IONamePrefixes::kOutputPHName, i) : outputs->at(i).name;
216     int binding_index;
217     TF_RETURN_IF_ERROR(GetTrtBindingIndex(output_name.c_str(), trt_profile_idx,
218                                           cuda_engine, &binding_index));
219 
220     // Get TRT output shapes for allocating output memory.
221     TensorShape output_shape;
222     TF_RETURN_IF_ERROR(GetTrtBindingShape(cuda_engine, execution_context,
223                                           binding_index, use_implicit_batch,
224                                           batch_size, output_shape));
225 
226     // Allocate output tensor of TRTEngineOp.
227     Tensor* output_tensor = nullptr;
228     if (ctx) {
229       tensorflow::profiler::TraceMe activity(
230           "AllocateOutput", tensorflow::profiler::TraceMeLevel::kInfo);
231       TF_RETURN_IF_ERROR(ctx->allocate_output(i, output_shape, &output_tensor));
232     } else {
233       // This path is used for unit tests. The tensor is already allocated.
234       // Its shape is not necessarily set correctly, we fix that.
235       VLOG(2) << "Applying shape " << output_shape.DebugString()
236               << " on output.";
237       output_tensor = &(outputs->at(i).tensor);
238       bool status = output_tensor->CopyFrom(*output_tensor, output_shape);
239       if (!status) {
240         return errors::Internal(
241             "Buffer size (", output_tensor->NumElements(),
242             ") do not match while reshaping output tensors to shape ",
243             output_shape.DebugString());
244       }
245     }
246 
247     // Set up output bindings.
248     TF_RETURN_IF_ERROR(
249         SetupBindings(cuda_engine, *output_tensor, buffers, binding_index));
250   }
251   return Status::OK();
252 }
253 
TrtEnqueue(nvinfer1::IExecutionContext * execution_context,std::vector<void * > & buffers,cudaStream_t stream,bool use_implicit_batch,int batch_size)254 Status TrtEnqueue(nvinfer1::IExecutionContext* execution_context,
255                   std::vector<void*>& buffers, cudaStream_t stream,
256                   bool use_implicit_batch, int batch_size) {
257   tensorflow::profiler::TraceMe activity(
258       "TrtEnqueue", tensorflow::profiler::TraceMeLevel::kInfo);
259   bool ret = false;
260   if (use_implicit_batch) {
261     ret = execution_context->enqueue(batch_size, &buffers[0], stream, nullptr);
262     VLOG(1) << "Called IExecutionContext::enqueue";
263   } else {
264     ret = execution_context->enqueueV2(&buffers[0], stream, nullptr);
265     VLOG(1) << "Called IExecutionContext::enqueueV2";
266   }
267   if (!ret) {
268     return errors::Internal("Failed to enqueue batch for TRT engine");
269   }
270   // Synchronization will be done by TF.
271   return Status::OK();
272 }
273 
274 }  // namespace tensorrt
275 }  // namespace tensorflow
276 
277 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
278