xref: /aosp_15_r20/external/tensorflow/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #if GOOGLE_CUDA && GOOGLE_TENSORRT
16 #include <functional>
17 #include <numeric>
18 #include <stack>
19 
20 #include "third_party/gpus/cuda/include/cuda.h"
21 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
22 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
23 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
24 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
25 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
26 #include "tensorflow/core/platform/logging.h"
27 #include "tensorflow/core/platform/stream_executor.h"
28 #include "tensorflow/core/platform/test.h"
29 #include "third_party/tensorrt/NvInfer.h"
30 #include "third_party/tensorrt/NvInferPlugin.h"
31 #include "third_party/tensorrt/NvInferRuntimeCommon.h"
32 
33 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
34 #include "third_party/tensorrt/plugin/efficientNMSPlugin/efficientNMSPlugin.h"
35 namespace tensorflow {
36 namespace tensorrt {
37 std::unique_ptr<nvinfer1::plugin::EfficientNMSPluginCreator>
MakeNMSPluginCreator(const std::string & plugin_namespace="tftrt")38 MakeNMSPluginCreator(const std::string& plugin_namespace = "tftrt") {
39   auto pluginCreator =
40       std::make_unique<nvinfer1::plugin::EfficientNMSPluginCreator>();
41   pluginCreator->setPluginNamespace(plugin_namespace.c_str());
42   std::string pluginType = std::string{pluginCreator->getPluginNamespace()} +
43                            "::" + std::string{pluginCreator->getPluginName()} +
44                            " version " +
45                            std::string{pluginCreator->getPluginVersion()};
46   VLOG(0) << "Created plugin type " << pluginType;
47   return pluginCreator;
48 }
49 
50 struct PluginDeleter {
51   void operator()(nvinfer1::IPluginV2* t);
52 };
53 
operator ()(nvinfer1::IPluginV2 * t)54 void PluginDeleter::operator()(nvinfer1::IPluginV2* t) { t->destroy(); }
55 
createPlugin(const std::string & name,nvinfer1::IPluginCreator * pluginCreator,const std::vector<nvinfer1::PluginField> & pluginFields)56 std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(
57     const std::string& name, nvinfer1::IPluginCreator* pluginCreator,
58     const std::vector<nvinfer1::PluginField>& pluginFields) {
59   if (!pluginCreator) {
60     return nullptr;
61   }
62   nvinfer1::PluginFieldCollection fc;
63   fc.nbFields = pluginFields.size();
64   fc.fields = pluginFields.data();
65   return std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter>{
66       pluginCreator->createPlugin(name.c_str(), &fc)};
67 }
68 }  // namespace tensorrt
69 }  // namespace tensorflow
70 #endif
71 
72 namespace tensorflow {
73 namespace tensorrt {
74 
75 class ScopedWeights {
76  public:
ScopedWeights(float value)77   ScopedWeights(float value) : value_(value) {
78     w.type = nvinfer1::DataType::kFLOAT;
79     w.values = &value_;
80     w.count = 1;
81   }
get()82   const nvinfer1::Weights& get() { return w; }
83 
84  private:
85   float value_;
86   nvinfer1::Weights w;
87 };
88 
89 class ScopedShapedWeights {
90  public:
ScopedShapedWeights(nvinfer1::Dims dims,float value)91   ScopedShapedWeights(nvinfer1::Dims dims, float value)
92       : dims_(dims),
93         value_(std::accumulate(dims.d, dims.d + dims.nbDims, 1,
94                                std::multiplies<>()),
95                value) {
96     w.type = nvinfer1::DataType::kFLOAT;
97     w.values = value_.data();
98     w.count = value_.size();
99   }
100 
101   nvinfer1::Dims dims_;
102   std::vector<float> value_;
103   nvinfer1::Weights w;
104 };
105 
106 const char* kInputTensor1 = "input1";
107 const char* kInputTensor2 = "input2";
108 const char* kOutputTensor1 = "output";
109 const char* kOutputTensor2 = "output-nms";
110 
111 // Creates a network to compute x+y.
CreateSerializedEngine()112 TrtUniquePtrType<nvinfer1::IHostMemory> CreateSerializedEngine() {
113   Logger& logger = *Logger::GetLogger();
114   TrtUniquePtrType<nvinfer1::IBuilder> builder(
115       nvinfer1::createInferBuilder(logger));
116   TrtUniquePtrType<nvinfer1::INetworkDefinition> network(
117       builder->createNetworkV2(
118           1U << static_cast<uint32_t>(
119               nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
120   // Add the input.
121   auto input1 = network->addInput(kInputTensor1, nvinfer1::DataType::kFLOAT,
122                                   nvinfer1::Dims4{1, 1, 1, 1});
123   auto input2 = network->addInput(kInputTensor2, nvinfer1::DataType::kFLOAT,
124                                   nvinfer1::Dims4{1, 1, 1, 1});
125   EXPECT_NE(input1, nullptr);
126   EXPECT_NE(input2, nullptr);
127   // Add an ILayer layer.
128   auto layer = network->addElementWise(*input1, *input2,
129                                        nvinfer1::ElementWiseOperation::kSUM);
130   EXPECT_NE(layer, nullptr);
131   auto output = layer->getOutput(0);
132   output->setName(kOutputTensor1);
133   network->markOutput(*output);
134 
135 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
136   // Add an efficient nms plugin.
137   ScopedShapedWeights boxes_weights(nvinfer1::Dims3(1, 10, 4), 0.0f);
138   ScopedShapedWeights scores_weights(nvinfer1::Dims3(1, 10, 10), 0.0f);
139   nvinfer1::IConstantLayer* boxes =
140       network->addConstant(boxes_weights.dims_, boxes_weights.w);
141   nvinfer1::IConstantLayer* scores =
142       network->addConstant(scores_weights.dims_, scores_weights.w);
143 
144   std::array<nvinfer1::ITensor*, 2> nms_inputs = {boxes->getOutput(0),
145                                                   scores->getOutput(0)};
146   auto plugin_creator = MakeNMSPluginCreator("tftrt");
147   auto plugin = createPlugin("nms_plugin_instance", plugin_creator.get(), {});
148   auto nms = network->addPluginV2(nms_inputs.data(), 2, *plugin);
149   nms->getOutput(0)->setName(kOutputTensor2);
150   network->markOutput(*nms->getOutput(0));
151 #else
152   auto sub_layer = network->addElementWise(
153       *input1, *input2, nvinfer1::ElementWiseOperation::kSUB);
154   EXPECT_NE(sub_layer, nullptr);
155   network->markOutput(*sub_layer->getOutput(0));
156   sub_layer->getOutput(0)->setName(kOutputTensor2);
157 #endif
158 
159   // Build the engine.
160   builder->setMaxBatchSize(1);
161   TrtUniquePtrType<nvinfer1::IBuilderConfig> builderConfig(
162       builder->createBuilderConfig());
163   builderConfig->setMaxWorkspaceSize(1 << 20);
164   TrtUniquePtrType<nvinfer1::ICudaEngine> engine(
165       builder->buildEngineWithConfig(*network, *builderConfig));
166   EXPECT_NE(engine, nullptr);
167   // Serialize the engine to create a model, then close everything.
168   TrtUniquePtrType<nvinfer1::IHostMemory> model(engine->serialize());
169   return model;
170 }
171 
172 template <typename T>
GetBindingSizeBytes(const nvinfer1::ICudaEngine & engine,int index,unsigned batch_size)173 unsigned GetBindingSizeBytes(const nvinfer1::ICudaEngine& engine, int index,
174                              unsigned batch_size) {
175   unsigned vol = batch_size;
176   auto dims = engine.getBindingDimensions(index);
177   int vecDim = engine.getBindingVectorizedDim(index);
178   if (-1 != vecDim)  // i.e., 0 != lgScalarsPerVector
179   {
180     int scalarsPerVec = engine.getBindingComponentsPerElement(index);
181     // Divide round up.
182     dims.d[vecDim] = (dims.d[vecDim] + scalarsPerVec - 1 / scalarsPerVec);
183     vol *= scalarsPerVec;
184   }
185   vol *= std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<>());
186   return vol * sizeof(T);
187 }
188 
189 // Executes the network.
Execute(nvinfer1::IExecutionContext * context,const float * input1,const float * input2,float * output1,float * output2)190 void Execute(nvinfer1::IExecutionContext* context, const float* input1,
191              const float* input2, float* output1, float* output2) {
192   const nvinfer1::ICudaEngine& engine = context->getEngine();
193 
194   // We have two bindings: input and output.
195   ASSERT_EQ(engine.getNbBindings(), 4);
196   const int input_index1 = engine.getBindingIndex(kInputTensor1);
197   const int input_index2 = engine.getBindingIndex(kInputTensor2);
198   const int output_index1 = engine.getBindingIndex(kOutputTensor1);
199   const int output_index2 = engine.getBindingIndex(kOutputTensor2);
200 
201   // Create GPU buffers and a stream
202   std::vector<void*> buffers(engine.getNbBindings());
203   for (int i = 0; i < buffers.size(); i++) {
204     ASSERT_EQ(
205         0, cudaMalloc(&buffers[i], GetBindingSizeBytes<float>(engine, i, 1)));
206   }
207 
208   cudaStream_t stream;
209   ASSERT_EQ(0, cudaStreamCreate(&stream));
210 
211   // Copy the input to the GPU, execute the network, and copy the output back.
212   //
213   // Note that since the host buffer was not created as pinned memory, these
214   // async copies are turned into sync copies. So the following synchronization
215   // could be removed.
216   ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index1], input1, sizeof(float),
217                                cudaMemcpyHostToDevice, stream));
218   ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index2], input2, sizeof(float),
219                                cudaMemcpyHostToDevice, stream));
220   context->enqueueV2(buffers.data(), stream, nullptr);
221   ASSERT_EQ(0, cudaMemcpyAsync(output1, buffers[output_index1], sizeof(float),
222                                cudaMemcpyDeviceToHost, stream));
223   ASSERT_EQ(
224       0, cudaMemcpyAsync(output2, buffers[output_index2],
225                          GetBindingSizeBytes<int32>(engine, output_index2, 1),
226                          cudaMemcpyDeviceToHost, stream));
227   cudaStreamSynchronize(stream);
228 
229   // Release the stream and the buffers
230   for (int i = 0; i < buffers.size(); i++) {
231     ASSERT_EQ(0, cudaFree(buffers[i]));
232   }
233   cudaStreamDestroy(stream);
234 }
235 
TEST(TensorrtTest,BasicFunctions)236 TEST(TensorrtTest, BasicFunctions) {
237   // We must register the plugin creator in order to deserialize the plugin.
238 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
239   auto plugin_creator = MakeNMSPluginCreator("tftrt");
240   getPluginRegistry()->registerCreator(*plugin_creator, "tftrt");
241 #endif
242 
243   // Handle the case where the test is run on machine with no gpu available.
244   if (CHECK_NOTNULL(GPUMachineManager())->VisibleDeviceCount() <= 0) {
245     LOG(WARNING) << "No gpu device available, probably not being run on a gpu "
246                     "machine. Skipping...";
247     return;
248   }
249 
250   // Create a serialized engine
251   TrtUniquePtrType<nvinfer1::IHostMemory> model = CreateSerializedEngine();
252   // Use the model to create an engine and then an execution context.
253   Logger& logger = *Logger::GetLogger();
254   TrtUniquePtrType<nvinfer1::IRuntime> runtime(
255       nvinfer1::createInferRuntime(logger));
256   TrtUniquePtrType<nvinfer1::ICudaEngine> engine(
257       runtime->deserializeCudaEngine(model->data(), model->size(), nullptr));
258   TrtUniquePtrType<nvinfer1::IExecutionContext> context(
259       engine->createExecutionContext());
260 
261   // Execute the network.
262   float input1 = 1234;
263   float input2 = 567;
264 
265   std::vector<float> output1(
266       GetBindingSizeBytes<float>(*engine, 2, 1) / sizeof(float), 0.0f);
267 
268   std::vector<float> output2(
269       GetBindingSizeBytes<int32>(*engine, 3, 1) / sizeof(int32), 0.0f);
270 
271   ASSERT_EQ(output1.size(), 1);
272   ASSERT_EQ(output2.size(), 1);
273 
274   Execute(context.get(), &input1, &input2, output1.data(), output2.data());
275   EXPECT_EQ(output1[0], input1 + input2);
276 
277 #ifdef TF_TRT_USE_EFFICIENT_NMS_PLUGIN
278   EXPECT_EQ(output2[0], 0);
279 #else
280   EXPECT_EQ(output2[0], 667);
281 #endif  // TF_TRT_USE_EFFICIENT_NMS_PLUGIN
282 }
283 
284 }  // namespace tensorrt
285 }  // namespace tensorflow
286 
287 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
288