1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #if GOOGLE_CUDA && GOOGLE_TENSORRT
16 #include <functional>
17 #include <numeric>
18 #include <stack>
19
20 #include "third_party/gpus/cuda/include/cuda.h"
21 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
22 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
23 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
24 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
25 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
26 #include "tensorflow/core/platform/logging.h"
27 #include "tensorflow/core/platform/stream_executor.h"
28 #include "tensorflow/core/platform/test.h"
29 #include "third_party/tensorrt/NvInfer.h"
30 #include "third_party/tensorrt/NvInferPlugin.h"
31 #include "third_party/tensorrt/NvInferRuntimeCommon.h"
32
33 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
34 #include "third_party/tensorrt/plugin/efficientNMSPlugin/efficientNMSPlugin.h"
35 namespace tensorflow {
36 namespace tensorrt {
37 std::unique_ptr<nvinfer1::plugin::EfficientNMSPluginCreator>
MakeNMSPluginCreator(const std::string & plugin_namespace="tftrt")38 MakeNMSPluginCreator(const std::string& plugin_namespace = "tftrt") {
39 auto pluginCreator =
40 std::make_unique<nvinfer1::plugin::EfficientNMSPluginCreator>();
41 pluginCreator->setPluginNamespace(plugin_namespace.c_str());
42 std::string pluginType = std::string{pluginCreator->getPluginNamespace()} +
43 "::" + std::string{pluginCreator->getPluginName()} +
44 " version " +
45 std::string{pluginCreator->getPluginVersion()};
46 VLOG(0) << "Created plugin type " << pluginType;
47 return pluginCreator;
48 }
49
50 struct PluginDeleter {
51 void operator()(nvinfer1::IPluginV2* t);
52 };
53
operator ()(nvinfer1::IPluginV2 * t)54 void PluginDeleter::operator()(nvinfer1::IPluginV2* t) { t->destroy(); }
55
createPlugin(const std::string & name,nvinfer1::IPluginCreator * pluginCreator,const std::vector<nvinfer1::PluginField> & pluginFields)56 std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(
57 const std::string& name, nvinfer1::IPluginCreator* pluginCreator,
58 const std::vector<nvinfer1::PluginField>& pluginFields) {
59 if (!pluginCreator) {
60 return nullptr;
61 }
62 nvinfer1::PluginFieldCollection fc;
63 fc.nbFields = pluginFields.size();
64 fc.fields = pluginFields.data();
65 return std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter>{
66 pluginCreator->createPlugin(name.c_str(), &fc)};
67 }
68 } // namespace tensorrt
69 } // namespace tensorflow
70 #endif
71
72 namespace tensorflow {
73 namespace tensorrt {
74
75 class ScopedWeights {
76 public:
ScopedWeights(float value)77 ScopedWeights(float value) : value_(value) {
78 w.type = nvinfer1::DataType::kFLOAT;
79 w.values = &value_;
80 w.count = 1;
81 }
get()82 const nvinfer1::Weights& get() { return w; }
83
84 private:
85 float value_;
86 nvinfer1::Weights w;
87 };
88
89 class ScopedShapedWeights {
90 public:
ScopedShapedWeights(nvinfer1::Dims dims,float value)91 ScopedShapedWeights(nvinfer1::Dims dims, float value)
92 : dims_(dims),
93 value_(std::accumulate(dims.d, dims.d + dims.nbDims, 1,
94 std::multiplies<>()),
95 value) {
96 w.type = nvinfer1::DataType::kFLOAT;
97 w.values = value_.data();
98 w.count = value_.size();
99 }
100
101 nvinfer1::Dims dims_;
102 std::vector<float> value_;
103 nvinfer1::Weights w;
104 };
105
106 const char* kInputTensor1 = "input1";
107 const char* kInputTensor2 = "input2";
108 const char* kOutputTensor1 = "output";
109 const char* kOutputTensor2 = "output-nms";
110
111 // Creates a network to compute x+y.
CreateSerializedEngine()112 TrtUniquePtrType<nvinfer1::IHostMemory> CreateSerializedEngine() {
113 Logger& logger = *Logger::GetLogger();
114 TrtUniquePtrType<nvinfer1::IBuilder> builder(
115 nvinfer1::createInferBuilder(logger));
116 TrtUniquePtrType<nvinfer1::INetworkDefinition> network(
117 builder->createNetworkV2(
118 1U << static_cast<uint32_t>(
119 nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
120 // Add the input.
121 auto input1 = network->addInput(kInputTensor1, nvinfer1::DataType::kFLOAT,
122 nvinfer1::Dims4{1, 1, 1, 1});
123 auto input2 = network->addInput(kInputTensor2, nvinfer1::DataType::kFLOAT,
124 nvinfer1::Dims4{1, 1, 1, 1});
125 EXPECT_NE(input1, nullptr);
126 EXPECT_NE(input2, nullptr);
127 // Add an ILayer layer.
128 auto layer = network->addElementWise(*input1, *input2,
129 nvinfer1::ElementWiseOperation::kSUM);
130 EXPECT_NE(layer, nullptr);
131 auto output = layer->getOutput(0);
132 output->setName(kOutputTensor1);
133 network->markOutput(*output);
134
135 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
136 // Add an efficient nms plugin.
137 ScopedShapedWeights boxes_weights(nvinfer1::Dims3(1, 10, 4), 0.0f);
138 ScopedShapedWeights scores_weights(nvinfer1::Dims3(1, 10, 10), 0.0f);
139 nvinfer1::IConstantLayer* boxes =
140 network->addConstant(boxes_weights.dims_, boxes_weights.w);
141 nvinfer1::IConstantLayer* scores =
142 network->addConstant(scores_weights.dims_, scores_weights.w);
143
144 std::array<nvinfer1::ITensor*, 2> nms_inputs = {boxes->getOutput(0),
145 scores->getOutput(0)};
146 auto plugin_creator = MakeNMSPluginCreator("tftrt");
147 auto plugin = createPlugin("nms_plugin_instance", plugin_creator.get(), {});
148 auto nms = network->addPluginV2(nms_inputs.data(), 2, *plugin);
149 nms->getOutput(0)->setName(kOutputTensor2);
150 network->markOutput(*nms->getOutput(0));
151 #else
152 auto sub_layer = network->addElementWise(
153 *input1, *input2, nvinfer1::ElementWiseOperation::kSUB);
154 EXPECT_NE(sub_layer, nullptr);
155 network->markOutput(*sub_layer->getOutput(0));
156 sub_layer->getOutput(0)->setName(kOutputTensor2);
157 #endif
158
159 // Build the engine.
160 builder->setMaxBatchSize(1);
161 TrtUniquePtrType<nvinfer1::IBuilderConfig> builderConfig(
162 builder->createBuilderConfig());
163 builderConfig->setMaxWorkspaceSize(1 << 20);
164 TrtUniquePtrType<nvinfer1::ICudaEngine> engine(
165 builder->buildEngineWithConfig(*network, *builderConfig));
166 EXPECT_NE(engine, nullptr);
167 // Serialize the engine to create a model, then close everything.
168 TrtUniquePtrType<nvinfer1::IHostMemory> model(engine->serialize());
169 return model;
170 }
171
172 template <typename T>
GetBindingSizeBytes(const nvinfer1::ICudaEngine & engine,int index,unsigned batch_size)173 unsigned GetBindingSizeBytes(const nvinfer1::ICudaEngine& engine, int index,
174 unsigned batch_size) {
175 unsigned vol = batch_size;
176 auto dims = engine.getBindingDimensions(index);
177 int vecDim = engine.getBindingVectorizedDim(index);
178 if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
179 {
180 int scalarsPerVec = engine.getBindingComponentsPerElement(index);
181 // Divide round up.
182 dims.d[vecDim] = (dims.d[vecDim] + scalarsPerVec - 1 / scalarsPerVec);
183 vol *= scalarsPerVec;
184 }
185 vol *= std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<>());
186 return vol * sizeof(T);
187 }
188
189 // Executes the network.
Execute(nvinfer1::IExecutionContext * context,const float * input1,const float * input2,float * output1,float * output2)190 void Execute(nvinfer1::IExecutionContext* context, const float* input1,
191 const float* input2, float* output1, float* output2) {
192 const nvinfer1::ICudaEngine& engine = context->getEngine();
193
194 // We have two bindings: input and output.
195 ASSERT_EQ(engine.getNbBindings(), 4);
196 const int input_index1 = engine.getBindingIndex(kInputTensor1);
197 const int input_index2 = engine.getBindingIndex(kInputTensor2);
198 const int output_index1 = engine.getBindingIndex(kOutputTensor1);
199 const int output_index2 = engine.getBindingIndex(kOutputTensor2);
200
201 // Create GPU buffers and a stream
202 std::vector<void*> buffers(engine.getNbBindings());
203 for (int i = 0; i < buffers.size(); i++) {
204 ASSERT_EQ(
205 0, cudaMalloc(&buffers[i], GetBindingSizeBytes<float>(engine, i, 1)));
206 }
207
208 cudaStream_t stream;
209 ASSERT_EQ(0, cudaStreamCreate(&stream));
210
211 // Copy the input to the GPU, execute the network, and copy the output back.
212 //
213 // Note that since the host buffer was not created as pinned memory, these
214 // async copies are turned into sync copies. So the following synchronization
215 // could be removed.
216 ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index1], input1, sizeof(float),
217 cudaMemcpyHostToDevice, stream));
218 ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index2], input2, sizeof(float),
219 cudaMemcpyHostToDevice, stream));
220 context->enqueueV2(buffers.data(), stream, nullptr);
221 ASSERT_EQ(0, cudaMemcpyAsync(output1, buffers[output_index1], sizeof(float),
222 cudaMemcpyDeviceToHost, stream));
223 ASSERT_EQ(
224 0, cudaMemcpyAsync(output2, buffers[output_index2],
225 GetBindingSizeBytes<int32>(engine, output_index2, 1),
226 cudaMemcpyDeviceToHost, stream));
227 cudaStreamSynchronize(stream);
228
229 // Release the stream and the buffers
230 for (int i = 0; i < buffers.size(); i++) {
231 ASSERT_EQ(0, cudaFree(buffers[i]));
232 }
233 cudaStreamDestroy(stream);
234 }
235
TEST(TensorrtTest,BasicFunctions)236 TEST(TensorrtTest, BasicFunctions) {
237 // We must register the plugin creator in order to deserialize the plugin.
238 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
239 auto plugin_creator = MakeNMSPluginCreator("tftrt");
240 getPluginRegistry()->registerCreator(*plugin_creator, "tftrt");
241 #endif
242
243 // Handle the case where the test is run on machine with no gpu available.
244 if (CHECK_NOTNULL(GPUMachineManager())->VisibleDeviceCount() <= 0) {
245 LOG(WARNING) << "No gpu device available, probably not being run on a gpu "
246 "machine. Skipping...";
247 return;
248 }
249
250 // Create a serialized engine
251 TrtUniquePtrType<nvinfer1::IHostMemory> model = CreateSerializedEngine();
252 // Use the model to create an engine and then an execution context.
253 Logger& logger = *Logger::GetLogger();
254 TrtUniquePtrType<nvinfer1::IRuntime> runtime(
255 nvinfer1::createInferRuntime(logger));
256 TrtUniquePtrType<nvinfer1::ICudaEngine> engine(
257 runtime->deserializeCudaEngine(model->data(), model->size(), nullptr));
258 TrtUniquePtrType<nvinfer1::IExecutionContext> context(
259 engine->createExecutionContext());
260
261 // Execute the network.
262 float input1 = 1234;
263 float input2 = 567;
264
265 std::vector<float> output1(
266 GetBindingSizeBytes<float>(*engine, 2, 1) / sizeof(float), 0.0f);
267
268 std::vector<float> output2(
269 GetBindingSizeBytes<int32>(*engine, 3, 1) / sizeof(int32), 0.0f);
270
271 ASSERT_EQ(output1.size(), 1);
272 ASSERT_EQ(output2.size(), 1);
273
274 Execute(context.get(), &input1, &input2, output1.data(), output2.data());
275 EXPECT_EQ(output1[0], input1 + input2);
276
277 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
278 EXPECT_EQ(output2[0], 0);
279 #else
280 EXPECT_EQ(output2[0], 667);
281 #endif // TF_TRT_USE_EFFICIENT_NMS_PLUGIN
282 }
283
284 } // namespace tensorrt
285 } // namespace tensorflow
286
287 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT
288