1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h"
17
18 #include <algorithm>
19 #include <cmath>
20 #include <functional>
21 #include <iterator>
22 #include <memory>
23 #include <numeric>
24 #include <type_traits>
25 #include <unordered_map>
26 #include <vector>
27
28 #if GOOGLE_CUDA && GOOGLE_TENSORRT
29
30 #include <gmock/gmock.h>
31 #include <gtest/gtest.h>
32 #include "absl/algorithm/container.h"
33 #include "absl/base/call_once.h"
34 #include "absl/container/inlined_vector.h"
35 #include "absl/strings/match.h"
36 #include "absl/strings/numbers.h"
37 #include "absl/strings/str_cat.h"
38 #include "absl/strings/str_format.h"
39 #include "absl/strings/string_view.h"
40 #include "absl/types/span.h"
41 #include "third_party/eigen3/Eigen/Core"
42 #include "third_party/gpus/cuda/include/cuda.h"
43 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
44 #include "tensorflow/cc/framework/ops.h"
45 #include "tensorflow/cc/framework/scope.h"
46 #include "tensorflow/cc/ops/nn_ops_internal.h"
47 #include "tensorflow/cc/ops/standard_ops.h"
48 #include "tensorflow/compiler/tf2tensorrt/common/datavec.h"
49 #include "tensorflow/compiler/tf2tensorrt/common/utils.h"
50 #include "tensorflow/compiler/tf2tensorrt/convert/op_converter_registry.h"
51 #include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
52 #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h"
53 #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
54 #include "tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h"
55 #include "tensorflow/core/common_runtime/device_mgr.h"
56 #include "tensorflow/core/common_runtime/gpu/gpu_managed_allocator.h"
57 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
58 #include "tensorflow/core/framework/allocator.h"
59 #include "tensorflow/core/framework/device_factory.h"
60 #include "tensorflow/core/framework/node_def.pb.h" // NOLINT
61 #include "tensorflow/core/framework/resource_var.h"
62 #include "tensorflow/core/framework/tensor.h"
63 #include "tensorflow/core/framework/tensor.pb.h" // NOLINT
64 #include "tensorflow/core/framework/tensor_shape.h"
65 #include "tensorflow/core/framework/tensor_testutil.h"
66 #include "tensorflow/core/framework/types.h"
67 #include "tensorflow/core/grappler/costs/graph_properties.h"
68 #include "tensorflow/core/kernels/variable_ops.h"
69 #include "tensorflow/core/lib/core/status.h"
70 #include "tensorflow/core/lib/core/status_test_util.h"
71 #include "tensorflow/core/lib/strings/str_util.h"
72 #include "tensorflow/core/lib/strings/strcat.h"
73 #include "tensorflow/core/platform/protobuf.h"
74 #include "tensorflow/core/platform/status_matchers.h"
75 #include "tensorflow/core/platform/test.h"
76 #include "tensorflow/core/platform/threadpool.h"
77 #include "tensorflow/core/protobuf/config.pb.h" // NOLINT
78 #include "tensorflow/core/public/session.h"
79 #include "tensorflow/core/public/version.h"
80 #include "tensorflow/core/util/tensor_slice_reader_cache.h"
81 #include "third_party/tensorrt/NvInfer.h"
82
83 namespace tensorflow {
84 namespace tensorrt {
85
86 // TensorRT modes for testing. We define the following three modes:
87 // 1. Implicit batch mode: The tensors have static (known) input shape and the
88 // the batch dimension (first dim) is removed from the TRT tensor shape. In
89 // a loose notation: trt_shape = tf_shape[1:].
90 // 2. Explicit batch mode: static (known) input shape, but the batch dimension
91 // is part of the trt tensor shape. (trt_shape = tf_shape)
92 // 3. Dynamic shape mode allows unknown input shapes, and requires explicit
93 // batch size definition (trt_shape = tf_shape).
94 //
95 // Note that the Converter only distinguishes between two modes:
96 // - use_implicit_batch == true, this corresponds to kImplicitBatch,
97 // - use_implicit_batch == false which includes both kExplicitBatch and
98 // kDynamicShape.
99 //
100 // For the converter, the distinction between explicit batch or dynamic shape
101 // mode follows from the input tensors of the network: dynamic shape input
102 // implies dynamic shape mode, while static shape input tensors imply explicit
103 // batch mode. We want to test all these modes, therefore we define the
104 // TrtTestMode with the following three options.
105 enum class TrtTestMode {
106 kImplicitBatch = 0,
107 kExplicitBatch = 1,
108 kDynamicShape = 2
109 };
110
DebugString(const TrtTestMode mode)111 string DebugString(const TrtTestMode mode) {
112 switch (mode) {
113 case TrtTestMode::kImplicitBatch:
114 return "kImplicitBatch";
115 case TrtTestMode::kExplicitBatch:
116 return "kExplicitBatch";
117 case TrtTestMode::kDynamicShape:
118 return "kDynamicShape";
119 default:
120 return "Invalid TrtTestMode";
121 }
122 }
123
124 namespace convert {
125
126 using absl::StrCat;
127 using ::testing::ElementsAre;
128 using ::testing::ElementsAreArray;
129 using ::testing::HasSubstr;
130 using ::testing::Matcher;
131 using ::testing::PrintToString;
132
133 using ::tensorflow::testing::IsOk;
134 using ::tensorflow::testing::StatusIs;
135
136 constexpr std::array<TrtTestMode, 3> ValidTrtModes = {
137 TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch,
138 TrtTestMode::kDynamicShape};
139
TrtShapedWeightsEquals(const TRT_ShapedWeights & lhs,const TRT_ShapedWeights & rhs)140 bool TrtShapedWeightsEquals(const TRT_ShapedWeights& lhs,
141 const TRT_ShapedWeights& rhs) {
142 return lhs.Shape() == rhs.Shape() && lhs.TrtDType() == rhs.TrtDType() &&
143 lhs.GetPointer<int8>() == rhs.GetPointer<int8>();
144 }
145
146 template <typename T>
ValidateWeights(const TRT_ShapedWeights & weights,const std::vector<int> & expected_dims,const std::vector<T> & expected_value)147 void ValidateWeights(const TRT_ShapedWeights& weights,
148 const std::vector<int>& expected_dims,
149 const std::vector<T>& expected_value) {
150 EXPECT_EQ(weights.Shape(), DimsAdapter(expected_dims));
151 ASSERT_EQ(expected_value.size(), weights.count()) << weights.DebugString();
152 const T* actual_values = weights.GetPointer<T>();
153 for (int i = 0; i < expected_value.size(); ++i) {
154 EXPECT_EQ(expected_value[i], actual_values[i]);
155 }
156 }
157
158 // TRT >= 8.2 optimizes memory management in the builder. When all builders
159 // are destroyed, it unloads many resources. This test fixture will create and
160 // destroy hundreds of builders when run sequentially for parameterized
161 // tests. We can hold open an IBuilder in order to prevent TRT from unloading
162 // shared resources between engine builds when using TRT shared library. This
163 // greatly speeds up unit tests and is safe to do.
PreventUnloadBuilderResources()164 void PreventUnloadBuilderResources() {
165 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
166 static thread_local absl::once_flag once;
167 static TrtUniquePtrType<nvinfer1::IBuilder> hold_builder = nullptr;
168 absl::call_once(
169 once,
170 [](TrtUniquePtrType<nvinfer1::IBuilder>& builder) {
171 if (!builder) {
172 builder.reset(nvinfer1::createInferBuilder(*Logger::GetLogger()));
173 }
174 },
175 hold_builder);
176 #endif
177 }
178
TEST(TRT_ShapedWeights_Test,Basic)179 TEST(TRT_ShapedWeights_Test, Basic) {
180 // Test constructor with no arguments.
181 {
182 TRT_ShapedWeights weights;
183 TRT_ShapedWeights copy(weights);
184 for (auto ptr : {&weights, ©}) {
185 nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
186 EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
187 EXPECT_EQ(nullptr, trt_weights.values);
188 EXPECT_EQ(0, trt_weights.count);
189
190 EXPECT_EQ(nullptr, ptr->GetPointer<int8>());
191 EXPECT_EQ(0, ptr->count());
192 EXPECT_EQ(0, ptr->size_bytes());
193 }
194 }
195 // Test constructor with DataType argument.
196 {
197 TRT_ShapedWeights weights(nvinfer1::DataType::kFLOAT);
198 TRT_ShapedWeights copy(weights);
199 for (auto ptr : {&weights, ©}) {
200 nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
201 EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
202 EXPECT_EQ(nullptr, trt_weights.values);
203 EXPECT_EQ(0, trt_weights.count);
204
205 EXPECT_EQ(nullptr, ptr->GetPointer<int8>());
206 EXPECT_EQ(0, ptr->count());
207 EXPECT_EQ(0, ptr->size_bytes());
208 }
209 }
210 // Test constructor with DataType and nvinfer1::Dims arguments.
211 {
212 TrtWeightStore store;
213 TRT_ShapedWeights weights =
214 store.GetTempWeights(nvinfer1::DataType::kFLOAT, CreateDims({2, 5}))
215 .ValueOrDie();
216 TRT_ShapedWeights copy(weights);
217 for (auto ptr : {&weights, ©}) {
218 nvinfer1::Weights trt_weights = ptr->GetTrtWeights();
219 EXPECT_EQ(nvinfer1::DataType::kFLOAT, trt_weights.type);
220 EXPECT_NE(nullptr, trt_weights.values);
221 EXPECT_EQ(10, trt_weights.count);
222
223 EXPECT_EQ(trt_weights.values, ptr->GetPointer<int8>());
224 EXPECT_EQ(10, ptr->count());
225 EXPECT_EQ(40, ptr->size_bytes());
226 }
227 // Test that it doesn't copy the underlying buffer.
228 EXPECT_EQ(weights.GetPointer<int8>(), copy.GetPointer<int8>());
229 }
230 }
231
TEST(TRT_TensorOrWeights_Test,Basic)232 TEST(TRT_TensorOrWeights_Test, Basic) {
233 // Test constructor with no arguments.
234 {
235 TRT_TensorOrWeights tw;
236 TRT_TensorOrWeights copy(tw);
237 TRT_TensorOrWeights assigned;
238 assigned = tw;
239 for (auto ptr : {&tw, ©, &assigned}) {
240 EXPECT_EQ(false, ptr->is_tensor());
241 EXPECT_EQ(false, ptr->is_weights());
242 EXPECT_EQ(-1, ptr->batch_size());
243 }
244 }
245
246 // Test constructor with ITensor and batch size argument.
247 {
248 nvinfer1::Dims dims;
249 dims.nbDims = 1;
250 dims.d[0] = 1;
251 ITensorProxyPtr itensor(dims);
252 TRT_TensorOrWeights tw(itensor);
253 TRT_TensorOrWeights tw1(itensor, /*batch_size=*/1);
254
255 for (auto original_ptr : {&tw, &tw1}) {
256 TRT_TensorOrWeights copy(*original_ptr);
257 TRT_TensorOrWeights assigned;
258 assigned = *original_ptr;
259
260 for (auto ptr : {original_ptr, ©, &assigned}) {
261 ASSERT_TRUE(ptr->is_tensor());
262 EXPECT_EQ(false, ptr->is_weights());
263 if (original_ptr == &tw) {
264 EXPECT_EQ(-1, ptr->batch_size());
265 } else {
266 EXPECT_EQ(1, ptr->batch_size());
267 }
268 EXPECT_EQ(itensor->simple_tensor(), ptr->tensor()->simple_tensor());
269 EXPECT_THAT(ptr->GetTrtDims(), DimsAreArray({1}));
270 }
271 }
272 }
273 // Test constructor which creates and owns an ITensor.
274 {
275 nvinfer1::Dims dims;
276 dims.nbDims = 1;
277 dims.d[0] = 1;
278 TRT_TensorOrWeights tw(nvinfer1::DataType::kFLOAT, dims, /*batch_size=*/1);
279 TRT_TensorOrWeights copy(tw);
280 TRT_TensorOrWeights assigned;
281 assigned = tw;
282
283 for (auto ptr : {&tw, ©, &assigned}) {
284 ASSERT_TRUE(ptr->is_tensor());
285 EXPECT_EQ(false, ptr->is_weights());
286 EXPECT_EQ(1, ptr->batch_size());
287 EXPECT_NE(nullptr, ptr->tensor()->simple_tensor());
288 EXPECT_THAT(ptr->GetTrtDims(), DimsAreArray({1}));
289 }
290 }
291 // Test constructor with TRT_ShapedWeights argument.
292 {
293 TRT_ShapedWeights weights;
294 TRT_TensorOrWeights tw(weights);
295 TRT_TensorOrWeights copy(tw);
296 TRT_TensorOrWeights assigned;
297 assigned = tw;
298 for (auto ptr : {&tw, ©, &assigned}) {
299 EXPECT_EQ(false, ptr->is_tensor());
300 EXPECT_EQ(true, ptr->is_weights());
301 EXPECT_TRUE(TrtShapedWeightsEquals(weights, ptr->weights()));
302 std::vector<int> empty_dims;
303 EXPECT_THAT(ptr->GetTrtDims(), DimsAreArray(empty_dims));
304 }
305 }
306 }
307
308 class ValidatorTest : public ::testing::Test {
309 public:
ValidatorTest()310 ValidatorTest() { PreventUnloadBuilderResources(); }
ConvertToTensorOrWeights(const Scope & scope,const Node * node,int output_port,TRT_TensorOrWeights * tensor_or_weights)311 Status ConvertToTensorOrWeights(const Scope& scope, const Node* node,
312 int output_port,
313 TRT_TensorOrWeights* tensor_or_weights) {
314 grappler::GrapplerItem item;
315 TF_EXPECT_OK(scope.ToGraphDef(&item.graph));
316 grappler::GraphProperties graph_properties(item);
317 TF_EXPECT_OK(graph_properties.InferStatically(true));
318
319 TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
320 /*use_calibration=*/false,
321 /*use_implicit_batch=*/true,
322 /*use_explicit_precision=*/false);
323 return validator.ConvertToTensorOrWeights(node->def(), output_port,
324 tensor_or_weights);
325 }
326 };
327
TEST_F(ValidatorTest,ConvertToTensorOrWeights)328 TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
329 // Convert Const.
330 {
331 Scope s = Scope::NewRootScope();
332 auto node =
333 ops::Const(s.WithOpName("my_const"), {1.0f, 2.0f}, TensorShape({2}));
334 TRT_TensorOrWeights output;
335 EXPECT_THAT(ConvertToTensorOrWeights(s, node.op().node(),
336 /*output_port=*/0, &output),
337 IsOk());
338 ValidateWeights<float>(output.weights(), {2}, {1.0, 2.0});
339 }
340
341 // Helper method to run ConvertToTensorOrWeights() with predefined parameters.
342 auto convert_to_tensor_or_weights = [this](const std::vector<int64_t>& dims,
343 TRT_TensorOrWeights* output) {
344 Scope s = Scope::NewRootScope();
345 const auto attrs = ops::Placeholder::Shape(PartialTensorShape{dims});
346 auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, attrs);
347 auto add = ops::Add(s.WithOpName("add"), feed, feed);
348 return this->ConvertToTensorOrWeights(s, add.operation.node(),
349 /*output_port=*/0, output);
350 };
351 // Convert non-Const with #dims > nvinfer1::Dims::MAX_DIMS+1.
352 {
353 TRT_TensorOrWeights output;
354 EXPECT_THAT(
355 convert_to_tensor_or_weights(
356 std::vector<int64_t>(nvinfer1::Dims::MAX_DIMS + 2, 1), &output),
357 StatusIs(error::OUT_OF_RANGE,
358 HasSubstr("Input tensor rank is greater than 9")));
359 }
360 // Convert non-Const with #dims < 1.
361 {
362 TRT_TensorOrWeights output;
363 EXPECT_THAT(convert_to_tensor_or_weights({}, &output),
364 StatusIs(error::INVALID_ARGUMENT,
365 HasSubstr("Scalar input tensor is not supported since "
366 "the first dimension "
367 "is treated as batch dimension by TRT")));
368 }
369 // Convert non-Const. We test the case where the non-batch dimension is
370 // unknown as well, to make sure the validator allows that.
371 for (const int32 non_batch_dim : {-1, 2}) {
372 const int32 batch_size = 12;
373 TRT_TensorOrWeights output;
374 EXPECT_THAT(
375 convert_to_tensor_or_weights({batch_size, non_batch_dim}, &output),
376 IsOk());
377 ASSERT_TRUE(output.is_tensor());
378 EXPECT_EQ(batch_size, output.batch_size());
379 EXPECT_NE(nullptr, output.tensor()->simple_tensor());
380 EXPECT_THAT(output.GetTrtDims(), DimsAreArray({non_batch_dim}));
381 }
382 }
383
TEST_F(ValidatorTest,IsTensorRTCandidate_Basics)384 TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) {
385 Scope s = Scope::NewRootScope();
386 auto input =
387 ops::Const(s.WithOpName("const"), {1.0f, 2.0f}, TensorShape({2}));
388 auto add = ops::Add(s.WithOpName("add"), input, input);
389 const Node* add_node = add.operation.node();
390
391 grappler::GrapplerItem item;
392 TF_EXPECT_OK(s.ToGraphDef(&item.graph));
393 grappler::GraphProperties graph_properties(item);
394 TF_EXPECT_OK(graph_properties.InferStatically(true));
395 TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32,
396 /*use_calibration=*/false,
397 /*use_implicit_batch=*/true,
398 /*use_explicit_precision=*/false);
399
400 // Override the Add converter.
401 bool start_conversion = false;
402 bool should_fail = false;
403 auto op_converter = [&start_conversion,
404 &should_fail](OpConverterParams* params) -> Status {
405 if (should_fail) return errors::InvalidArgument("");
406 if (!params->validation_only) start_conversion = true;
407 return Status::OK();
408 };
409
410 // Validator not registered.
411 auto original_op_converter = GetOpConverterRegistry()->LookUp("Add");
412 ASSERT_TRUE(original_op_converter.ok());
413 GetOpConverterRegistry()->Clear("Add");
414 EXPECT_THAT(validator.IsTensorRTCandidate(add_node),
415 StatusIs(error::UNIMPLEMENTED,
416 HasSubstr("Op type Add is not supported.")));
417 GetOpConverterRegistry()->Register("Add", kDefaultConverterPriority + 1,
418 op_converter);
419 TF_EXPECT_OK(validator.IsTensorRTCandidate(add_node));
420 EXPECT_EQ(false, start_conversion);
421
422 // Let the converter return error.
423 should_fail = true;
424 EXPECT_THAT(validator.IsTensorRTCandidate(add_node),
425 StatusIs(error::INVALID_ARGUMENT));
426 GetOpConverterRegistry()->Clear("Add");
427 GetOpConverterRegistry()->Register("Add", kDefaultConverterPriority,
428 *original_op_converter);
429 }
430
TEST(TrtNodeValidator,IsTensorRTCandidate)431 TEST(TrtNodeValidator, IsTensorRTCandidate) {
432 // Create a graph containing both TRT-compatible and TRT-incompatible nodes
433 // and use it to test TrtNodeValidator::IsTensorRTCandidate().
434 const std::vector<int32> input_shape_array{2, 2};
435 TensorShape input_shape;
436 TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_shape_array, &input_shape));
437
438 Scope s = Scope::NewRootScope();
439 ops::Placeholder::Attrs feed_attrs;
440 TF_EXPECT_OK(
441 TensorShapeUtils::MakeShape(input_shape_array, &feed_attrs.shape_));
442
443 // Compatible input.
444 auto feed = ops::Placeholder(s.WithOpName("feed"), DT_FLOAT, feed_attrs);
445 auto const_1 = ops::Const(s.WithOpName("const_1"), 1.0f, input_shape);
446
447 // Compatible MatMul.
448 auto matmul = ops::MatMul(s.WithOpName("matmul"), feed, const_1);
449
450 // Incompatible MatMul.
451 ops::MatMul::Attrs matmul_attrs;
452 matmul_attrs.transpose_a_ = true;
453 auto incompatible_matmul = ops::MatMul(s.WithOpName("incompatible_matmul"),
454 feed, const_1, matmul_attrs);
455
456 // Unsupported op.
457 auto unsupported_op = ops::Erfc(s.WithOpName("sin"), feed);
458
459 // Incompatible input.
460 auto incompatible_feed = ops::Placeholder(s.WithOpName("feed"), DT_DOUBLE);
461 auto const_2 = ops::Const(s.WithOpName("const_2"), 1.0, input_shape);
462 // Compatible op with incompatible input.
463 auto matmul_with_incompatible_input =
464 ops::MatMul(s.WithOpName("matmul_with_incompatible_input"),
465 incompatible_feed, const_2);
466
467 // Quantize ops.
468 auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
469 auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("quantize"), feed,
470 quantize_attrs);
471
472 // Get GrapplerItem and GraphProperties.
473 grappler::GrapplerItem item;
474 TF_EXPECT_OK(s.ToGraphDef(&item.graph));
475 Tensor feed_tensor(DT_FLOAT, input_shape);
476 item.feed.push_back(std::make_pair("feed", feed_tensor));
477 grappler::GraphProperties graph_properties(item);
478 TF_EXPECT_OK(graph_properties.InferStatically(true));
479
480 for (const TrtPrecisionMode precision_mode :
481 {TrtPrecisionMode::FP32, TrtPrecisionMode::INT8}) {
482 TrtNodeValidator validator(graph_properties, precision_mode,
483 /*use_calibration=*/false,
484 /*use_implicit_batch=*/true,
485 /*use_explicit_precision=*/false);
486 TF_EXPECT_OK(validator.IsTensorRTCandidate(matmul.operation.node()));
487 EXPECT_THAT(
488 validator.IsTensorRTCandidate(incompatible_matmul.operation.node()),
489 StatusIs(error::INVALID_ARGUMENT,
490 HasSubstr("MatMul with 2D tensors requires explicit batch "
491 "mode, or that tensor A "
492 "is not transposed and B is a constant tensor.")));
493 EXPECT_THAT(validator.IsTensorRTCandidate(unsupported_op.operation.node()),
494 StatusIs(error::UNIMPLEMENTED,
495 HasSubstr("Op type Erfc is not supported")));
496 EXPECT_THAT(validator.IsTensorRTCandidate(
497 matmul_with_incompatible_input.operation.node()),
498 StatusIs(error::INTERNAL,
499 HasSubstr("Failed to convert at least one input to a "
500 "TRT_TensorOrWeights:")));
501 if (precision_mode == TrtPrecisionMode::INT8) {
502 TF_EXPECT_OK(validator.IsTensorRTCandidate(quantize.operation.node()));
503 } else {
504 EXPECT_THAT(
505 validator.IsTensorRTCandidate(quantize.operation.node()),
506 StatusIs(
507 error::UNIMPLEMENTED,
508 HasSubstr("Op type FakeQuantWithMinMaxArgs is not supported")));
509 }
510 }
511 }
512
513 class ConverterTest : public ::testing::Test {
514 public:
ConverterTest()515 ConverterTest() {
516 PreventUnloadBuilderResources();
517 Reset();
518 }
519
Reset()520 void Reset() {
521 GetOpConverterRegistry()->Clear("MyOp");
522 GetOpConverterRegistry()->Clear("DummyOp");
523 converter_ =
524 std::move(Converter::Create(TrtPrecisionMode::FP32,
525 /*use_calibration=*/false, &logger_,
526 /*use_implicit_batch=*/true,
527 /*engine_name=*/"TRTEngineOp_000_000",
528 /*use_explicit_precision=*/false)
529 .ValueOrDie());
530 weight_store_ = &converter_->weight_store_;
531 }
532
533 // TODO(cbate): These should be removed or changed to public per black-box
534 // testing principle.
535 // Below we expose private methods of Converter for testing.
MaybeUpdateBatchSize(int batch_size)536 Status MaybeUpdateBatchSize(int batch_size) {
537 return converter_->MaybeUpdateBatchSize(batch_size);
538 }
539
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)540 Status AddTensorOrWeights(const string& name, TRT_TensorOrWeights input) {
541 return converter_->AddTensorOrWeights(name, input);
542 }
543
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)544 Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
545 return converter_->GetTensorOrWeights(name, output);
546 }
547
GetInputs(const NodeDef & node_def,std::vector<TRT_TensorOrWeights> * inputs) const548 Status GetInputs(const NodeDef& node_def,
549 std::vector<TRT_TensorOrWeights>* inputs) const {
550 return converter_->GetInputs(node_def, inputs);
551 }
552
GetWeightRange(const TRT_ShapedWeights & weights,float * out_min,float * out_max) const553 Status GetWeightRange(const TRT_ShapedWeights& weights, float* out_min,
554 float* out_max) const {
555 return converter_->GetWeightRange(weights, out_min, out_max);
556 }
557
batch_size() const558 int batch_size() const { return converter_->batch_size_; }
559
quantization_ranges_proxy()560 std::unordered_map<ITensorProxyPtr*, float>& quantization_ranges_proxy() {
561 return converter_->quantization_ranges_proxy_;
562 }
563
quantization_ranges()564 std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
565 return converter_->quantization_ranges_;
566 }
567
568 private:
569 Logger& logger_ = *Logger::GetLogger();
570
571 protected:
572 std::unique_ptr<Converter> converter_;
573 TrtWeightStore* weight_store_;
574 };
575
TEST_F(ConverterTest,ConvertNode)576 TEST_F(ConverterTest, ConvertNode) {
577 ITensorProxyPtr output_tensors[2];
578 auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
579 nvinfer1::Dims dims = params->inputs[0].tensor()->getDimensions();
580 for (int i = 0; i < 2; ++i) {
581 dims.d[0] += 1;
582 output_tensors[i]->setDimensions(dims);
583 params->outputs->push_back(TRT_TensorOrWeights(output_tensors[i]));
584 }
585 return Status::OK();
586 };
587 NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
588
589 TF_ASSERT_OK(converter_->AddInputTensor(
590 "my_input", nvinfer1::DataType::kFLOAT, CreateDims({123}), 1));
591
592 // Converter not registered.
593 EXPECT_THAT(
594 converter_->ConvertNode(node_def),
595 StatusIs(error::NOT_FOUND, HasSubstr("No converter for op MyOp")));
596
597 // Register the converter and retry.
598 GetOpConverterRegistry()->Register("MyOp", kDefaultConverterPriority,
599 op_converter);
600 TF_ASSERT_OK(converter_->ConvertNode(node_def));
601
602 TRT_TensorOrWeights actual_output_1;
603 TF_EXPECT_OK(GetTensorOrWeights("my_op", &actual_output_1));
604 EXPECT_EQ(output_tensors[0]->simple_tensor(),
605 actual_output_1.tensor()->simple_tensor());
606 EXPECT_EQ(124, actual_output_1.tensor()->getDimensions().d[0]);
607
608 TRT_TensorOrWeights actual_output_2;
609 TF_EXPECT_OK(GetTensorOrWeights("my_op:1", &actual_output_2));
610 EXPECT_EQ(output_tensors[1]->simple_tensor(),
611 actual_output_2.tensor()->simple_tensor());
612 EXPECT_EQ(125, actual_output_2.tensor()->getDimensions().d[0]);
613
614 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
615 }
616
TEST_F(ConverterTest,AddAndGetInputs)617 TEST_F(ConverterTest, AddAndGetInputs) {
618 NodeDef node_def;
619 node_def.add_input("^control_input");
620 node_def.add_input("input");
621 node_def.add_input("input:0");
622 node_def.add_input("input:1");
623 node_def.add_input("weird_input:2:3:4:0");
624
625 TF_EXPECT_OK(converter_->AddInputTensor("input", nvinfer1::DataType::kFLOAT,
626 CreateDims({1}), 1));
627 TF_EXPECT_OK(converter_->AddInputTensor("input:1", nvinfer1::DataType::kINT32,
628 CreateDims({2, 3}), 1));
629 TF_EXPECT_OK(converter_->AddInputTensor(
630 "weird_input:2:3:4", nvinfer1::DataType::kHALF, CreateDims({5, 3}), 1));
631
632 std::vector<TRT_TensorOrWeights> inputs;
633 TF_EXPECT_OK(GetInputs(node_def, &inputs));
634
635 EXPECT_EQ(4, inputs.size());
636 EXPECT_EQ(inputs[0].tensor()->trt_tensor(), inputs[1].tensor()->trt_tensor());
637
638 EXPECT_EQ(nvinfer1::DataType::kFLOAT, inputs[0].tensor()->getType());
639 EXPECT_EQ(nvinfer1::DataType::kINT32, inputs[2].tensor()->getType());
640 EXPECT_EQ(nvinfer1::DataType::kHALF, inputs[3].tensor()->getType());
641 EXPECT_THAT(inputs[0].tensor()->getDimensions(), DimsAreArray({1}));
642 EXPECT_THAT(inputs[2].tensor()->getDimensions(), DimsAreArray({2, 3}));
643 EXPECT_THAT(inputs[3].tensor()->getDimensions(), DimsAreArray({5, 3}));
644
645 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
646 }
647
TEST_F(ConverterTest,RenameAndMarkOutputTensors)648 TEST_F(ConverterTest, RenameAndMarkOutputTensors) {
649 // Test that the tensor are actually named and marked as output after
650 // Converter::RenameAndMarkOutputTensors() is called.
651
652 // Register a custom converter which shuffles the input. We use it to build a
653 // TRT network whose output will be later marked.
654 std::vector<ITensorProxyPtr> output_tensors;
655 auto op_converter = [&output_tensors](OpConverterParams* params) -> Status {
656 nvinfer1::Permutation perm;
657 perm.order[0] = 1;
658 perm.order[1] = 0;
659 for (int i = 0; i < 2; ++i) {
660 ITensorProxyPtr input_tensor = params->inputs[0].tensor();
661 nvinfer1::IShuffleLayer* layer =
662 params->converter->network()->addShuffle(*input_tensor->trt_tensor());
663 layer->setFirstTranspose(perm);
664 ITensorProxyPtr output_tensor = layer->getOutput(0);
665 params->outputs->emplace_back(output_tensor);
666 output_tensors.push_back(output_tensor);
667 }
668 TRT_ShapedWeights output_weights(nvinfer1::DataType::kFLOAT);
669 params->outputs->emplace_back(output_weights);
670 return Status::OK();
671 };
672 GetOpConverterRegistry()->Register("MyOp", kDefaultConverterPriority,
673 op_converter);
674
675 // Run the conversion.
676 NodeDef node_def = MakeNodeDef("my_op", "MyOp", {"my_input"});
677 TF_EXPECT_OK(converter_->AddInputTensor(
678 "my_input", nvinfer1::DataType::kFLOAT, CreateDims({1, 2}), 1));
679 TF_EXPECT_OK(converter_->ConvertNode(node_def));
680
681 // Mark a weight as output, should fail.
682 EXPECT_THAT(
683 converter_->RenameAndMarkOutputTensors({{"my_op:2", "my_output"}}),
684 StatusIs(error::INVALID_ARGUMENT,
685 HasSubstr("Output my_op:2 is weights not tensor")));
686
687 // Mark tensors as output, should pass.
688 TF_EXPECT_OK(converter_->RenameAndMarkOutputTensors(
689 {{"my_op", "my_output"}, {"my_op:1", "my_output_1"}}));
690 EXPECT_EQ(2, output_tensors.size());
691 for (auto output_tensor : output_tensors) {
692 EXPECT_THAT(output_tensor->getDimensions(), DimsAreArray({2, 1}));
693 }
694 EXPECT_EQ("my_output", string(output_tensors[0]->getName()));
695 EXPECT_EQ("my_output_1", string(output_tensors[1]->getName()));
696
697 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
698 }
699
TEST_F(ConverterTest,TransposeTensor)700 TEST_F(ConverterTest, TransposeTensor) {
701 ITensorProxyPtr input_tensor = converter_->network()->addInput(
702 "", nvinfer1::DataType::kFLOAT, CreateDims({2, 3, 5}));
703 ITensorProxyPtr output_tensor = nullptr;
704 NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
705 // Rank doesn't match.
706 EXPECT_THAT(converter_->TransposeTensor(input_tensor, {0, 1}, &output_tensor,
707 dummy_node_def, "sub1"),
708 StatusIs(error::INVALID_ARGUMENT,
709 HasSubstr("Rank of perm for transpose does not match "
710 "with that of the input")));
711
712 // Transpose at batch dimension.
713 EXPECT_THAT(
714 converter_->TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor,
715 dummy_node_def, "sub2"),
716 StatusIs(error::UNIMPLEMENTED,
717 HasSubstr("Transpose at batch dimension is not supported.")));
718
719 // OK.
720 TF_EXPECT_OK(converter_->TransposeTensor(
721 input_tensor, {0, 3, 1, 2}, &output_tensor, dummy_node_def, "sub3"));
722 EXPECT_THAT(output_tensor->getDimensions(), DimsAreArray({5, 2, 3}));
723 EXPECT_THAT(
724 converter_->network(),
725 LayerNamesAreArray({"TRTEngineOp_000_000/dummy_op-sub3:SHUFFLE"}));
726 }
727
TestPrepareTensorForShape(const std::vector<int> & input_dims,const std::vector<int> & reshape_dims,const std::vector<int> & expected_tensor_dims,bool input_is_tensor,Converter * converter,TrtWeightStore * weight_store,error::Code expected_code=error::OK,const char * expected_error_msg_substr=nullptr)728 void TestPrepareTensorForShape(
729 const std::vector<int>& input_dims, const std::vector<int>& reshape_dims,
730 const std::vector<int>& expected_tensor_dims, bool input_is_tensor,
731 Converter* converter, TrtWeightStore* weight_store,
732 error::Code expected_code = error::OK,
733 const char* expected_error_msg_substr = nullptr) {
734 TRT_TensorOrWeights input;
735 if (input_is_tensor) {
736 input = TRT_TensorOrWeights(converter->network()->addInput(
737 "", nvinfer1::DataType::kFLOAT, CreateDims(input_dims)));
738 } else {
739 input = TRT_TensorOrWeights(
740 weight_store
741 ->GetTempWeights(nvinfer1::DataType::kFLOAT, CreateDims(input_dims))
742 .ValueOrDie());
743 }
744 ITensorProxyPtr output_tensor = nullptr;
745
746 NodeDef dummy_node_def = MakeNodeDef("dummy_op", "DummyOp", {});
747 for (bool validation_only : {false, true}) {
748 const Status status =
749 PrepareTensorForShape(converter, input, DimsAdapter(reshape_dims),
750 validation_only, &output_tensor, dummy_node_def);
751 if (expected_code == error::OK) {
752 TF_EXPECT_OK(status);
753 if (validation_only) {
754 EXPECT_EQ(nullptr, *output_tensor);
755 } else {
756 EXPECT_THAT(output_tensor->getDimensions(),
757 DimsAreArray(expected_tensor_dims));
758 }
759 } else {
760 EXPECT_THAT(status, StatusIs(expected_code,
761 HasSubstr(expected_error_msg_substr)));
762 }
763 }
764 }
765
TEST_F(ConverterTest,PrepareTensorForShape)766 TEST_F(ConverterTest, PrepareTensorForShape) {
767 for (bool input_is_tensor : {true, false}) {
768 // Shape size doesn't match.
769 Reset();
770 TestPrepareTensorForShape({2, 3, 5}, {2, 3, 6}, {}, input_is_tensor,
771 converter_.get(), weight_store_,
772 error::INVALID_ARGUMENT, "Incompatible shapes");
773
774 // Regular shape.
775 Reset();
776 TestPrepareTensorForShape({2, 3, 5}, {10, 3}, {10, 3}, input_is_tensor,
777 converter_.get(), weight_store_);
778
779 // Reshape to zero rank.
780 Reset();
781 TestPrepareTensorForShape({1, 1}, {}, {}, input_is_tensor, converter_.get(),
782 weight_store_);
783 }
784
785 // Tensor input with zero rank.
786 Reset();
787 TestPrepareTensorForShape({}, {1, 1}, {1, 1}, /*input_is_tensor=*/true,
788 converter_.get(), weight_store_);
789
790 // TODO(aaroey): we should check the case where uninferred dimensions are
791 // not an exact divisor of input dim ensions, e.g. for dims {-1, 7}.
792
793 // Infer tensor shape, ok.
794 Reset();
795 TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
796 /*input_is_tensor=*/true, converter_.get(),
797 weight_store_);
798
799 // Infer weight shape, should fail.
800 Reset();
801 TestPrepareTensorForShape({2, 3, 5}, {-1, 2}, {15, 2},
802 /*input_is_tensor=*/false, converter_.get(),
803 weight_store_, error::INVALID_ARGUMENT,
804 "Shape is not fully defined");
805
806 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
807 }
808
TEST_F(ConverterTest,MaybeUpdateBatchSize)809 TEST_F(ConverterTest, MaybeUpdateBatchSize) {
810 EXPECT_EQ(-1, batch_size());
811
812 TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
813 EXPECT_EQ(-1, batch_size());
814
815 TF_EXPECT_OK(MaybeUpdateBatchSize(123));
816 EXPECT_EQ(123, batch_size());
817
818 TF_EXPECT_OK(MaybeUpdateBatchSize(123));
819 EXPECT_EQ(123, batch_size());
820
821 TF_EXPECT_OK(MaybeUpdateBatchSize(-1));
822 EXPECT_EQ(123, batch_size());
823
824 EXPECT_THAT(
825 MaybeUpdateBatchSize(124),
826 StatusIs(error::INVALID_ARGUMENT,
827 HasSubstr(
828 "Provided batch size does not match converter batch size")));
829 }
830
TEST_F(ConverterTest,AddAndGetTensorOrWeights)831 TEST_F(ConverterTest, AddAndGetTensorOrWeights) {
832 // Add a tensor.
833 ITensorProxyPtr simple_tensor;
834 TRT_TensorOrWeights tensor(simple_tensor);
835 EXPECT_EQ(-1, tensor.batch_size());
836 TF_EXPECT_OK(MaybeUpdateBatchSize(123));
837 TF_EXPECT_OK(AddTensorOrWeights("my_tensor", tensor));
838
839 // Get the added tensor.
840 TRT_TensorOrWeights added_tensor;
841 TF_EXPECT_OK(GetTensorOrWeights("my_tensor", &added_tensor));
842 EXPECT_EQ(123, added_tensor.batch_size());
843
844 // Add the same tensor again.
845 EXPECT_THAT(AddTensorOrWeights("my_tensor", tensor),
846 StatusIs(error::ALREADY_EXISTS,
847 HasSubstr("tensor/weights my_tensor already exist")));
848 }
849
850 template <typename T>
TestGetWeightRange(ConverterTest * test,TrtWeightStore * weight_store)851 void TestGetWeightRange(ConverterTest* test, TrtWeightStore* weight_store) {
852 nvinfer1::DataType trt_type;
853 TF_ASSERT_OK(TfTypeToTrtType(DataTypeToEnum<T>::v(), &trt_type));
854 TRT_ShapedWeights weights =
855 weight_store->GetTempWeights(trt_type, CreateDims({2, 3})).ValueOrDie();
856 const std::vector<T> values = {T(3), T(1), T(2), T(6), T(5), T(4)};
857 absl::c_copy(values, weights.GetPointer<T>());
858 float out_min = 0.0f;
859 float out_max = 0.0f;
860 TF_EXPECT_OK(test->GetWeightRange(weights, &out_min, &out_max));
861 EXPECT_EQ(1.0f, out_min);
862 EXPECT_EQ(6.0f, out_max);
863 }
864
TEST_F(ConverterTest,GetWeightRange)865 TEST_F(ConverterTest, GetWeightRange) {
866 TestGetWeightRange<float>(this, weight_store_);
867 TestGetWeightRange<Eigen::half>(this, weight_store_);
868 TestGetWeightRange<int32>(this, weight_store_);
869 }
870
TEST_F(ConverterTest,ProvideQuantizationRange)871 TEST_F(ConverterTest, ProvideQuantizationRange) {
872 ITensorProxyPtr simple_tensor;
873 // Asymmetric range
874 converter_->ProvideQuantizationRange(&simple_tensor, 0.0f, 6.0f);
875 EXPECT_EQ(6.0f, quantization_ranges_proxy()[&simple_tensor]);
876 converter_->ProvideQuantizationRange(&simple_tensor, 1.0f, 6.0f);
877 EXPECT_EQ(6.0f, quantization_ranges_proxy()[&simple_tensor]);
878 converter_->ProvideQuantizationRange(&simple_tensor, -8.0f, 6.0f);
879 EXPECT_EQ(8.0f, quantization_ranges_proxy()[&simple_tensor]);
880 converter_->ProvideQuantizationRange(&simple_tensor, -8.123f, -6.123f);
881 EXPECT_EQ(8.123f, quantization_ranges_proxy()[&simple_tensor]);
882 // Symmetric range
883 converter_->ProvideQuantizationRange(&simple_tensor, -6.123f, 6.123f);
884 EXPECT_EQ(6.123f, quantization_ranges_proxy()[&simple_tensor]);
885
886 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
887 }
888
TEST_F(ConverterTest,MaybeApplyQuantizationRanges)889 TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
890 ITensorProxyPtr input;
891 ITensorProxyPtr not_infer;
892 Logger& logger = *Logger::GetLogger();
893 auto int8_converter = Converter::Create(TrtPrecisionMode::INT8,
894 /*use_calibration=*/true, &logger,
895 /*use_implicit_batch=*/true,
896 /*engine_name=*/"")
897 .ValueOrDie();
898 int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f);
899 int8_converter->ProvideQuantizationRange(¬_infer, -100.0f, 100.0f);
900
901 int8_converter->MaybeApplyQuantizationRanges();
902 EXPECT_EQ(input->getDynamicRangeMax(), 5.0f);
903 EXPECT_EQ(not_infer->getDynamicRangeMax(), 100.0f);
904
905 EXPECT_THAT(int8_converter->network(), LayerNamesNonEmpty());
906 }
907
TEST_F(ConverterTest,GetTrtBroadcastShape)908 TEST_F(ConverterTest, GetTrtBroadcastShape) {
909 const bool kIsTensor = true;
910 const bool kIsNotTensor = false;
911 auto symmetric_test = [this](const std::vector<int>& operand_1_shape,
912 const std::vector<int>& operand_2_shape,
913 const bool operand_1_is_tensor,
914 const bool operand_2_is_tensor,
915 const std::vector<int>& expected_operand_1_shape,
916 const std::vector<int>& expected_operand_2_shape,
917 error::Code expected_code = error::OK,
918 const char* expected_error_msg_substr = "",
919 const int operand_1_batch_size = -1,
920 const int operand_2_batch_size = -1) {
921 auto create_tensor_or_weights = [](const std::vector<int>& shape,
922 bool is_tensor, int batch_size = -1) {
923 if (is_tensor) {
924 return TRT_TensorOrWeights(nvinfer1::DataType::kFLOAT,
925 CreateDims(shape), batch_size);
926 }
927 TRT_ShapedWeights weights;
928 weights.Shape() = CreateDims(shape);
929 return TRT_TensorOrWeights(weights);
930 };
931
932 nvinfer1::Dims operand_1_new_dims, operand_2_new_dims;
933 TRT_TensorOrWeights operand_1 = create_tensor_or_weights(
934 operand_1_shape, operand_1_is_tensor, operand_1_batch_size);
935 TRT_TensorOrWeights operand_2 = create_tensor_or_weights(
936 operand_2_shape, operand_2_is_tensor, operand_2_batch_size);
937
938 // operand_1 broadcast operand_2
939 EXPECT_THAT(
940 GetTrtBroadcastShape(operand_1, operand_2, /*check_feasibility=*/true,
941 /*use_implicit_batch=*/true, &operand_1_new_dims,
942 &operand_2_new_dims),
943 StatusIs(expected_code, HasSubstr(expected_error_msg_substr)));
944 if (expected_code == error::OK) {
945 EXPECT_THAT(operand_1_new_dims, DimsAreArray(expected_operand_1_shape));
946 EXPECT_THAT(operand_2_new_dims, DimsAreArray(expected_operand_2_shape));
947 }
948 // operand_2 broadcast operand_1
949 EXPECT_THAT(
950 GetTrtBroadcastShape(operand_2, operand_1, /*check_feasibility=*/true,
951 /*use_implicit_batch=*/true, &operand_2_new_dims,
952 &operand_1_new_dims),
953 StatusIs(expected_code, HasSubstr(expected_error_msg_substr)));
954 if (expected_code == error::OK) {
955 EXPECT_THAT(operand_1_new_dims, DimsAreArray(expected_operand_1_shape));
956 EXPECT_THAT(operand_2_new_dims, DimsAreArray(expected_operand_2_shape));
957 }
958 };
959
960 // Both inputs are weights.
961 symmetric_test(
962 {1}, {1}, kIsNotTensor, kIsNotTensor, {}, {}, error::INVALID_ARGUMENT,
963 "Broadcasting requires at least one of the operands be tensors");
964
965 // One tensor and one weights.
966 symmetric_test({1, 1, 1}, {2}, kIsTensor, kIsNotTensor, {1, 1, 1}, {1, 1, 2});
967 symmetric_test({1, 1, 2}, {2}, kIsTensor, kIsNotTensor, {1, 1, 2}, {1, 1, 2});
968 symmetric_test({1, 3, 2}, {1}, kIsTensor, kIsNotTensor, {1, 3, 2}, {1, 1, 1});
969 symmetric_test({1, 1, 1}, {2, 3}, kIsTensor, kIsNotTensor, {1, 1, 1},
970 {1, 2, 3});
971 symmetric_test({1, 1, 1}, {2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
972 {2, 3, 4});
973 symmetric_test({1, 1, 1}, {1, 2, 3, 4}, kIsTensor, kIsNotTensor, {1, 1, 1},
974 {2, 3, 4});
975 symmetric_test({1, 3, 4}, {1, 2, 1, 4}, kIsTensor, kIsNotTensor, {1, 3, 4},
976 {2, 1, 4});
977 symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
978 error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
979 symmetric_test({1, 1, 1}, {2, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
980 error::INVALID_ARGUMENT, "Infeasible broadcast scheme",
981 /*operand_1_batch_size=*/2);
982 symmetric_test({1, 1, 1}, {1, 1, 1, 1, 1}, kIsTensor, kIsNotTensor, {}, {},
983 error::INVALID_ARGUMENT,
984 "Broadcasting beyond batch dimension is not supported "
985 "(tensor #dims 4 vs broadcast #dims 5)");
986 symmetric_test({3}, {1, 1, 3}, kIsTensor, kIsNotTensor, {}, {},
987 error::INVALID_ARGUMENT,
988 "Broadcasting beyond batch dimension is not supported "
989 "(tensor #dims 2 vs broadcast #dims 3)",
990 /*operand_1_batch_size=*/2);
991
992 // Both inputs are tensors.
993 symmetric_test({1, 1, 1}, {1, 1}, kIsTensor, kIsTensor, {}, {},
994 error::INVALID_ARGUMENT,
995 "Broadcasting beyond batch dimension is not supported "
996 "(tensor #dims 3 vs broadcast #dims 4)");
997 symmetric_test({1, 3}, {3}, kIsTensor, kIsTensor, {}, {},
998 error::INVALID_ARGUMENT,
999 "Broadcasting beyond batch dimension is not supported "
1000 "(tensor #dims 2 vs broadcast #dims 3)");
1001 symmetric_test({1, 3, 4}, {2, 1, 4}, kIsTensor, kIsTensor, {1, 3, 4},
1002 {2, 1, 4});
1003 symmetric_test({1, 1, 1}, {1, 1, 1, 1}, kIsTensor, kIsTensor, {}, {},
1004 error::INVALID_ARGUMENT,
1005 "Broadcasting beyond batch dimension is not supported "
1006 "(tensor #dims 4 vs broadcast #dims 5)");
1007 symmetric_test({2, 3}, {7, 5}, kIsTensor, kIsTensor, {}, {},
1008 error::INVALID_ARGUMENT, "Infeasible broadcast scheme");
1009
1010 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
1011 }
1012
TEST_F(ConverterTest,CreateConstantLayer)1013 TEST_F(ConverterTest, CreateConstantLayer) {
1014 for (auto dtype : {nvinfer1::DataType::kFLOAT, nvinfer1::DataType::kINT32}) {
1015 TRT_ShapedWeights weights =
1016 weight_store_->GetTempWeights(dtype, CreateDims({2, 3, 5}))
1017 .ValueOrDie();
1018 ITensorProxyPtr tensor =
1019 converter_->CreateConstantLayer(weights, CreateDims({3, 10}));
1020 ASSERT_NE(nullptr, tensor->trt_tensor());
1021 EXPECT_EQ(dtype, tensor->getType())
1022 << "Expected " << DebugString(dtype) << " vs. actual "
1023 << DebugString(tensor->getType());
1024 EXPECT_THAT(tensor->getDimensions(), DimsAreArray({3, 10}));
1025 }
1026
1027 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
1028 }
1029
1030 class ConvertGraphDefToEngineTest : public ::testing::Test {
1031 public:
RunConvertGraphDefToEngine(Scope * s)1032 Status RunConvertGraphDefToEngine(Scope* s) {
1033 GraphDef gdef;
1034 TF_EXPECT_OK(s->ToGraphDef(&gdef));
1035 std::vector<PartialTensorShape> input_shapes;
1036 int batch_size = -1;
1037 for (const NodeDef& node : gdef.node()) {
1038 absl::string_view node_name(node.name());
1039 if (absl::ConsumePrefix(&node_name, IONamePrefixes::kInputPHName)) {
1040 int port = -1;
1041 EXPECT_TRUE(absl::SimpleAtoi(node_name, &port)) << node.name();
1042 if (input_shapes.size() < port + 1) input_shapes.resize(port + 1);
1043 input_shapes[port] =
1044 PartialTensorShape(node.attr().at("shape").shape());
1045 if (batch_size == -1) {
1046 batch_size = input_shapes[port].dim_size(0);
1047 } else {
1048 EXPECT_EQ(batch_size, input_shapes[port].dim_size(0));
1049 }
1050 }
1051 }
1052 // TODO(laigd): execute the engine and get outputs.
1053 return ConvertGraphDefToEngine(
1054 gdef, /*ctx=*/nullptr, TrtPrecisionMode::FP32, /*max_batch_size=*/1,
1055 /*max_workspace_size_bytes=*/64 << 20, input_shapes, &logger_,
1056 /*allocator=*/nullptr, /*calibrator=*/nullptr, &engine_,
1057 /*use_calibration=*/false, /*use_implicit_batch=*/true,
1058 /*convert_successfully=*/nullptr, /*profiles=*/nullptr,
1059 "TRTEngineOp_000_000", /*use_explicit_precision=*/false);
1060 }
1061
1062 protected:
1063 TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1064
1065 private:
1066 Logger& logger_ = *Logger::GetLogger();
1067 };
1068
TEST_F(ConvertGraphDefToEngineTest,IdentityGraph)1069 TEST_F(ConvertGraphDefToEngineTest, IdentityGraph) {
1070 Scope s = Scope::NewRootScope();
1071 auto input =
1072 ops::Placeholder(s.WithOpName(StrCat(IONamePrefixes::kInputPHName, 0)),
1073 DT_FLOAT, ops::Placeholder::Shape({1, 1}));
1074 auto output = ops::Identity(s.WithOpName("identity1"), input);
1075 output = ops::Identity(s.WithOpName("identity2"), output);
1076 output = ops::Identity(s.WithOpName(StrCat(IONamePrefixes::kOutputPHName, 0)),
1077 output);
1078 // If the converter marks the input tensor as output tensor, the conversion
1079 // below will fail with:
1080 // > TensorRTOutputPH_0 cannot be both input and output
1081 // > Network must have at least one output
1082 TF_EXPECT_OK(RunConvertGraphDefToEngine(&s));
1083 }
1084
1085 // Returns a vector of shapes from a vector of input tensors. This can be used
1086 // to create optimization profiles.
GetShapeFromDataVec(DataVec input_data,std::vector<TensorShape> * shape_vec)1087 Status GetShapeFromDataVec(DataVec input_data,
1088 std::vector<TensorShape>* shape_vec) {
1089 shape_vec->reserve(input_data.size());
1090 std::transform(input_data.begin(), input_data.end(),
1091 std::back_inserter(*shape_vec),
1092 [](InputOutputData x) { return x.tensor.shape(); });
1093 return Status::OK();
1094 }
1095
1096 template <typename T>
GetSpanForData(const InputOutputData & data)1097 inline absl::Span<const T> GetSpanForData(const InputOutputData& data) {
1098 const auto& tensor_map = data.tensor.flat<T>();
1099 return absl::Span<const T>(tensor_map.data(), tensor_map.size());
1100 }
1101
GetDataAsFloat(InputOutputData & data)1102 std::vector<float> GetDataAsFloat(InputOutputData& data) {
1103 const auto dType = data.tensor.dtype();
1104 if (dType == DT_FLOAT) {
1105 auto span = GetSpanForData<float>(data);
1106 return std::vector<float>(span.begin(), span.end());
1107 }
1108 if (dType == DT_HALF) {
1109 return CastVector<Eigen::half, float>(GetSpanForData<Eigen::half>(data));
1110 }
1111 if (dType == DT_INT32) {
1112 return CastVector<int32, float>(GetSpanForData<int32>(data));
1113 }
1114 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
1115 if (dType == DT_BOOL) {
1116 return CastVector<bool, float>(GetSpanForData<bool>(data));
1117 }
1118 #endif
1119 LOG(FATAL) << "DataType not supported for testing " << DataTypeString(dType);
1120 return {};
1121 }
1122
1123 // Class to test various op converters, using both a TrtNodeValidator and
1124 // Converter.
1125 class OpConverterTest : public ::testing::Test {
1126 public:
OpConverterTest()1127 OpConverterTest()
1128 : tensor_buffer_allocator_(new GpuManagedAllocator()),
1129 scope_(Scope::NewRootScope()) {
1130 PreventUnloadBuilderResources();
1131 QCHECK_EQ(0, cudaStreamCreate(&stream_));
1132 Reset();
1133 }
1134
~OpConverterTest()1135 ~OpConverterTest() noexcept override {
1136 QCHECK_EQ(0, cudaStreamDestroy(stream_));
1137 }
1138
GetTensorOrWeights(const string & name,TRT_TensorOrWeights * output)1139 Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output) {
1140 return converter_->GetTensorOrWeights(name, output);
1141 }
1142
Reset(TrtPrecisionMode precision_mode_to_test=TrtPrecisionMode::FP32,TrtTestMode trt_mode=TrtTestMode::kImplicitBatch,OpKernelContext * ctx=nullptr)1143 void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32,
1144 TrtTestMode trt_mode = TrtTestMode::kImplicitBatch,
1145 OpKernelContext* ctx = nullptr) {
1146 // Destroy existing TRT objects in a proper order.
1147 converter_.reset(nullptr);
1148 engine_.reset(nullptr);
1149
1150 // Re-create them in proper order.
1151 converter_ =
1152 std::move(Converter::Create(precision_mode_to_test,
1153 /*use_calibration=*/false, &logger_,
1154 /*use_implicit_batch=*/trt_mode ==
1155 TrtTestMode::kImplicitBatch,
1156 /*engine_name=*/"",
1157 /*use_explicit_precision=*/false, ctx)
1158 .ValueOrDie());
1159
1160 // Reset other related artifacts.
1161 scope_ = Scope::NewRootScope();
1162 }
1163
1164 // Constructs a flat tensor with 'vals' in Unified Memory.
1165 template <typename T>
AsTensor(gtl::ArraySlice<T> vals)1166 Tensor AsTensor(gtl::ArraySlice<T> vals) { // non-absl ok
1167 Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1168 {static_cast<int64_t>(vals.size())});
1169 std::copy_n(vals.data(), vals.size(), ret.flat<T>().data());
1170 return ret;
1171 }
1172
1173 // Constructs a tensor of "shape" with values "vals" in Unified Memory.
1174 template <typename T>
AsTensor(gtl::ArraySlice<T> vals,const TensorShape & shape)1175 Tensor AsTensor(gtl::ArraySlice<T> vals, // non-absl ok
1176 const TensorShape& shape) {
1177 Tensor ret(tensor_buffer_allocator_.get(), DataTypeToEnum<T>::value,
1178 {static_cast<int64_t>(vals.size())});
1179 CHECK(ret.CopyFrom(AsTensor(vals), shape));
1180 return ret;
1181 }
1182
1183 template <typename T, typename S>
transformTensor(const std::vector<T> & vals,Tensor & ret)1184 void transformTensor(const std::vector<T>& vals, Tensor& ret) {
1185 std::transform(vals.begin(), vals.end(), ret.flat<S>().data(),
1186 [](const T in_val) -> S { return static_cast<S>(in_val); });
1187 }
1188
1189 template <typename T, typename S>
transformWeights(const std::vector<T> & vals,TRT_ShapedWeights & weights)1190 void transformWeights(const std::vector<T>& vals,
1191 TRT_ShapedWeights& weights) {
1192 std::transform(vals.begin(), vals.end(), weights.GetPointer<S>(),
1193 [](const T in_val) -> S { return static_cast<S>(in_val); });
1194 }
1195
1196 // Constructs a tensor with given values (vals). The tensor type is defined by
1197 // the tf_type argument, its shape is given by input_dims. The tensor is
1198 // constructed using the allocator of OpConverterTest in Unified Memory.
1199 template <typename T>
AsTensor(const std::vector<T> & vals,const std::vector<int> & input_dims,DataType tf_type)1200 Tensor AsTensor(const std::vector<T>& vals,
1201 const std::vector<int>& input_dims, DataType tf_type) {
1202 Tensor ret(tensor_buffer_allocator_.get(), tf_type,
1203 {static_cast<int64_t>(vals.size())});
1204 if (tf_type == DT_FLOAT) {
1205 transformTensor<T, float>(vals, ret);
1206 } else if (tf_type == DT_HALF) {
1207 transformTensor<T, Eigen::half>(vals, ret);
1208 } else if (tf_type == DT_INT32) {
1209 transformTensor<T, int32>(vals, ret);
1210 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
1211 } else if (tf_type == DT_BOOL) {
1212 transformTensor<T, bool>(vals, ret);
1213 #endif
1214 } else {
1215 LOG(FATAL) << "Cannot create tensor with type "
1216 << DataTypeString(tf_type);
1217 }
1218 TensorShape shape;
1219 TF_EXPECT_OK(TensorShapeUtils::MakeShape(input_dims, &shape));
1220 CHECK(ret.CopyFrom(ret, shape));
1221 return ret;
1222 }
1223
1224 template <typename T>
AsTensor(const std::vector<int> & vals,const std::vector<int> & input_dims,DataType tf_type)1225 Tensor AsTensor(const std::vector<int>& vals,
1226 const std::vector<int>& input_dims, DataType tf_type) {
1227 const auto& conv_vals = CastVector<int, T>(vals);
1228 return AsTensor(conv_vals, input_dims, tf_type);
1229 }
1230
1231 // Constructs a flat tensor in Unified Memory.
1232 template <typename T>
ConstructTensor(int data_size,const T & value=T ())1233 Tensor ConstructTensor(int data_size, const T& value = T()) {
1234 std::vector<T> values(data_size, value);
1235 return AsTensor<T>(values);
1236 }
1237
1238 // Constructs a flat tensor in Unified Memory.
1239 template <typename T>
ConstructTensor(int data_size,const T & value,DataType tf_type)1240 Tensor ConstructTensor(int data_size, const T& value, DataType tf_type) {
1241 std::vector<T> values(data_size, value);
1242 return AsTensor<T>(values, {data_size}, tf_type);
1243 }
1244
CheckDataTypeMatches(const DataVec & datas)1245 void CheckDataTypeMatches(const DataVec& datas) {
1246 if (VLOG_IS_ON(2)) {
1247 int nbBindings = engine_->getNbBindings();
1248 VLOG(2) << "Number of engine bindings: " << nbBindings;
1249 for (int i = 0; i < nbBindings; i++) {
1250 VLOG(2) << "Binding " << i << " name: " << engine_->getBindingName(i);
1251 }
1252 }
1253 for (const auto& data : datas) {
1254 VLOG(2) << "Checking if data type matches for tensor " << data.name;
1255 const int input_index = engine_->getBindingIndex(data.name.c_str());
1256 ASSERT_NE(-1, input_index);
1257 const nvinfer1::DataType trt_dtype =
1258 engine_->getBindingDataType(input_index);
1259 DataType tf_type;
1260 TF_ASSERT_OK(TrtTypeToTfType(trt_dtype, &tf_type));
1261 ASSERT_EQ(data.tensor.dtype(), tf_type)
1262 << DataTypeString(data.tensor.dtype()) << " vs. "
1263 << DataTypeString(tf_type);
1264 }
1265 }
1266
BuildAndRun(const DataVec & input_data,DataVec * output_data,const int batch_size=1)1267 Status BuildAndRun(const DataVec& input_data, DataVec* output_data,
1268 const int batch_size = 1) {
1269 // Mark the output tensor as TRT engine output.
1270 std::vector<Converter::EngineOutputInfo> output_info;
1271 for (const auto& data : *output_data) {
1272 nvinfer1::DataType trt_type;
1273 TF_RETURN_IF_ERROR(TfTypeToTrtType(data.tensor.dtype(), &trt_type));
1274 output_info.push_back({data.name, data.name, trt_type});
1275 }
1276 TF_RETURN_IF_ERROR(converter_->RenameAndMarkOutputTensors(output_info));
1277
1278 // Build the TRT engine.
1279 if (engine_.get() != nullptr) {
1280 return errors::Internal("Engine already exists");
1281 }
1282 TrtShapeOptimizationProfile profiles;
1283 if (!converter_->use_implicit_batch()) {
1284 std::vector<bool> input_mask(input_data.size());
1285 for (int i = 0; i < input_data.size(); i++) {
1286 input_mask[i] = (input_data[i].tensor.dtype() != DataType::DT_RESOURCE);
1287 }
1288 profiles.SetInputMask(input_mask);
1289 profiles.SetShapeTensorMask(converter_->network());
1290 TF_RETURN_IF_ERROR(profiles.CollectShapeValues(input_data));
1291 // Create a single optimization profile for explicit batch mode
1292 std::vector<TensorShape> input_shapes;
1293 TF_RETURN_IF_ERROR(GetShapeFromDataVec(input_data, &input_shapes));
1294 profiles.AddShape(input_shapes);
1295 std::vector<PartialTensorShape> input_partial_shapes;
1296 TF_RETURN_IF_ERROR(
1297 GetNetworkInputShapes(converter_->network(), &input_partial_shapes));
1298 profiles.InitProfiles(input_partial_shapes,
1299 ProfileStrategy::kImplicitBatchModeCompatible);
1300 }
1301 TF_RETURN_IF_ERROR(
1302 converter_->BuildCudaEngine(&engine_,
1303 /*max_batch_size=*/batch_size,
1304 /*max_workspace_size_bytes=*/1 << 26,
1305 /*allocator=*/nullptr,
1306 /*calibrator=*/nullptr,
1307 /*profiles=*/&profiles));
1308 CHECK_NOTNULL(engine_.get());
1309 CheckDataTypeMatches(input_data);
1310 CheckDataTypeMatches(*output_data);
1311
1312 const int num_bindings = input_data.size() + output_data->size();
1313 std::vector<void*> buffers(num_bindings);
1314
1315 if (engine_->getNbBindings() != num_bindings) {
1316 return errors::Internal("Number of bindings do not match");
1317 }
1318 // Since we have only 1 optimization profile (which is enabled by default)
1319 // it is fine to create execution context directly, instead of calling
1320 // profiles.CreateExecutionContexts()
1321 TrtUniquePtrType<nvinfer1::IExecutionContext> execution_context(
1322 engine_->createExecutionContext());
1323
1324 // Prepare input bindings.
1325 TF_RETURN_IF_ERROR(
1326 SetTrtEngineInputs(engine_.get(), execution_context.get(), 0, buffers,
1327 converter_->use_implicit_batch(), batch_size,
1328 profiles, nullptr, &input_data));
1329 // Prepare output bindings.
1330 TF_RETURN_IF_ERROR(SetTrtEngineOutputs(
1331 engine_.get(), execution_context.get(), 0, buffers,
1332 converter_->use_implicit_batch(), batch_size, nullptr, output_data));
1333 // Execute the TRT engine.
1334 TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), buffers, stream_,
1335 converter_->use_implicit_batch(),
1336 batch_size));
1337 cudaStreamSynchronize(stream_);
1338 return Status::OK();
1339 }
1340
1341 // Adds ITensor for both validation and conversion, assuming explicit batch
1342 // dimension is included in dims (ie for an NCHW tensor dims = {N, C, H, W}).
AddTestTensorWithTFDims(const string & name,const std::vector<int32> & dims,nvinfer1::DataType trt_type=nvinfer1::DataType::kFLOAT,Status add_input_status=Status::OK ())1343 void AddTestTensorWithTFDims(
1344 const string& name, const std::vector<int32>& dims,
1345 nvinfer1::DataType trt_type = nvinfer1::DataType::kFLOAT,
1346 Status add_input_status = Status::OK()) {
1347 DataType tf_type;
1348 TF_ASSERT_OK(TrtTypeToTfType(trt_type, &tf_type));
1349 ops::Placeholder::Attrs attrs;
1350 TF_EXPECT_OK(TensorShapeUtils::MakeShape(dims, &attrs.shape_));
1351
1352 auto input = ops::Placeholder(scope_.WithOpName(name), tf_type, attrs);
1353 node_inputs_[name] = input.output;
1354
1355 // Add a real ITensor for conversion conditionally.
1356
1357 auto dims_adap =
1358 DimsAdapter::Create(attrs.shape_, converter_->use_implicit_batch());
1359 if (converter_->use_implicit_batch() && !dims_adap.ok()) {
1360 ASSERT_EQ(add_input_status, dims_adap.status());
1361 return;
1362 } else {
1363 TF_EXPECT_OK(dims_adap.status());
1364 }
1365 if (!converter_->use_implicit_batch() || dims_adap->IsStatic()) {
1366 int batch_size = dims.size() > 0 ? dims[0] : 0;
1367 Status status = converter_->AddInputTensor(
1368 name, trt_type, dims_adap->AsTrtDims(), batch_size);
1369 ASSERT_EQ(add_input_status, status);
1370 }
1371 }
1372
AddTensorOrWeights(const string & name,TRT_TensorOrWeights input)1373 Status AddTensorOrWeights(const string& name, TRT_TensorOrWeights input) {
1374 return converter_->AddTensorOrWeights(name, input);
1375 }
1376
1377 // Adds ITensor for both validation and conversion. The difference compared to
1378 // AddTestTensorWithTFDims is in the meaning of the dims parameter. To define
1379 // a tensor with NCHW shape, here we set dims = {C,H,W} and batch_size = N.
1380 // TODO(tfeher) remove this function once all test are updated to use the
1381 // other version of AddTestTensor (defined by
1382 // ParameterizedOpConverterTestBase).
AddTestTensor(const string & name,const std::vector<int32> & dims,int batch_size=1,nvinfer1::DataType trt_dtype=nvinfer1::DataType::kFLOAT)1383 void AddTestTensor(
1384 const string& name, const std::vector<int32>& dims, int batch_size = 1,
1385 nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT) {
1386 DimsAdapter adap(dims);
1387 std::vector<int32_t> dims_vec;
1388 TF_CHECK_OK(adap.Prepend(batch_size).Vector(&dims_vec));
1389 AddTestTensorWithTFDims(name, dims_vec, trt_dtype);
1390 if (adap.IsStatic()) {
1391 ASSERT_EQ(batch_size, converter_->batch_size_);
1392 }
1393 }
1394
1395 // Adds weights for both validation and conversion. The type of the weight is
1396 // determined by tf_type. The initial value vector (values) can have any
1397 // type (T) that can be statically casted to tf_type.
1398 template <typename T = int32>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & values_inp,DataType tf_type,bool fix_values=true)1399 void AddTestWeights(const string& name, const std::vector<int>& dims,
1400 const std::vector<T>& values_inp, DataType tf_type,
1401 bool fix_values = true) {
1402 const DimsAdapter dims_adap(dims);
1403 const int64_t num_elements = dims_adap.Volume();
1404
1405 std::vector<T> values(values_inp);
1406 if (num_elements != values.size()) {
1407 if (fix_values) {
1408 AdjustVectorByDims<T>(values, num_elements, name, "AddTestWeights");
1409 } else {
1410 FAIL() << "Unable to create test weights: "
1411 << (num_elements > values.size() ? "not enough" : "to many")
1412 << " values specified: " << values.size() << " vs. "
1413 << num_elements << " defined by dims";
1414 }
1415 }
1416 // Add weights for validation.
1417 Tensor t = AsTensor<T>(values, dims, tf_type);
1418 node_inputs_[name] = ops::Const(scope_.WithOpName(name), t);
1419
1420 // Add weights for conversion.
1421 nvinfer1::DataType dtype;
1422 TF_ASSERT_OK(TfTypeToTrtType(tf_type, &dtype));
1423 QCHECK_EQ(num_elements, values.size())
1424 << num_elements << " vs " << values.size();
1425 TRT_ShapedWeights weights(dtype);
1426 if (num_elements) {
1427 weights =
1428 converter_->weight_store_.GetTempWeights(dtype, dims_adap.AsTrtDims())
1429 .value();
1430
1431 if (tf_type == DT_FLOAT) {
1432 transformWeights<T, float>(values, weights);
1433 } else if (tf_type == DT_HALF) {
1434 transformWeights<T, Eigen::half>(values, weights);
1435 } else if (tf_type == DT_INT32) {
1436 transformWeights<T, int32>(values, weights);
1437 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
1438 } else if (tf_type == DT_BOOL) {
1439 transformWeights<T, bool>(values, weights);
1440 #endif
1441 } else {
1442 LOG(FATAL) << "Cannot create tensor with type "
1443 << DataTypeString(tf_type);
1444 }
1445 }
1446 TF_EXPECT_OK(
1447 converter_->AddTensorOrWeights(name, TRT_TensorOrWeights{weights}));
1448 }
1449
1450 // Adds test weight without specifying tf_type arg. In this case the initial
1451 // value type (T) will determine the type of the weights.
1452 template <typename T = int32>
AddTestWeights(const string & name,const std::vector<int> & dims,const std::vector<T> & value,bool fix_values=true)1453 void AddTestWeights(const string& name, const std::vector<int>& dims,
1454 const std::vector<T>& value, bool fix_values = true) {
1455 AddTestWeights(name, dims, value, DataTypeToEnum<T>::value, fix_values);
1456 }
1457
1458 // Test validation in validation-only mode.
RunValidation(const Node * node)1459 Status RunValidation(const Node* node) {
1460 grappler::GrapplerItem item;
1461 TF_EXPECT_OK(scope_.ToGraphDef(&item.graph));
1462 grappler::GraphProperties graph_properties(item);
1463 TF_EXPECT_OK(graph_properties.InferStatically(true));
1464
1465 TrtNodeValidator validator(
1466 graph_properties, converter_->precision_mode(),
1467 /*use_calibration=*/false,
1468 /*use_implicit_batch=*/converter_->use_implicit_batch(),
1469 /*use_explicit_precision=*/false);
1470 return validator.IsTensorRTCandidate(node);
1471 }
1472
RunConversion(const Node * node,error::Code expected_code=error::OK,const std::string & expected_msg_substr="")1473 void RunConversion(const Node* node, error::Code expected_code = error::OK,
1474 const std::string& expected_msg_substr = "") {
1475 EXPECT_THAT(converter_->ConvertNode(node->def()),
1476 StatusIs(expected_code, HasSubstr(expected_msg_substr)));
1477 if (expected_code == error::OK) {
1478 EXPECT_THAT(converter_->network(), LayerNamesNonEmpty());
1479 }
1480 }
1481
1482 // Helper method to run both validation and conversion, when the expected
1483 // output are same.
RunValidationAndConversion(const NodeDef & node_def,error::Code expected_code=error::OK,const std::string & expected_msg_substr="",bool should_run_conversion=true)1484 void RunValidationAndConversion(const NodeDef& node_def,
1485 error::Code expected_code = error::OK,
1486 const std::string& expected_msg_substr = "",
1487 bool should_run_conversion = true) {
1488 // Add the node to the graph.
1489 // TODO(laigd): we should accept a function that adds the node using
1490 // `scope_`, so individual test case can reuse the scope object and we don't
1491 // need to add the edges here by ourselves.
1492 Graph* graph = scope_.graph();
1493 Status status;
1494 Node* node = graph->AddNode(std::move(node_def), &status);
1495 TF_EXPECT_OK(status);
1496 for (int i = 0; i < node_def.input().size(); ++i) {
1497 const string& input_name = node_def.input(i);
1498 const auto& itr = node_inputs_.find(input_name);
1499 QCHECK(itr != node_inputs_.end());
1500 const Output& input = itr->second;
1501 graph->AddEdge(input.node(), input.index(), node, i);
1502 }
1503
1504 status = RunValidation(node);
1505 if (should_run_conversion && status.ok()) {
1506 RunConversion(node, expected_code, expected_msg_substr);
1507 } else {
1508 EXPECT_THAT(status,
1509 StatusIs(expected_code, HasSubstr(expected_msg_substr)));
1510 }
1511 }
1512
1513 // Helper method to run both validation and conversion, and check the output
1514 // shapes.
RunValidationAndConversion(const NodeDef & node_def,const Status & status,const std::string & output_name,const std::vector<std::vector<int>> & exp_out_dims)1515 void RunValidationAndConversion(
1516 const NodeDef& node_def, const Status& status,
1517 const std::string& output_name,
1518 const std::vector<std::vector<int>>& exp_out_dims) {
1519 RunValidationAndConversion(node_def, status.code(), status.error_message(),
1520 true);
1521
1522 if (status.ok()) {
1523 // TODO(tfeher): Enable this check in explicit_batch_mode.
1524 // In dynamic shape mode the output dims cannot be tested here. In that
1525 // case we need to wait for the concrate input shapes to be defined (by
1526 // setBindingDimensions before enqueue) before we can check the output
1527 // dims.
1528 if (converter_->use_implicit_batch()) {
1529 for (int i = 0; i < exp_out_dims.size(); i++) {
1530 TRT_TensorOrWeights output;
1531 string name = i == 0 ? output_name : StrCat(output_name, ":", i);
1532 TF_EXPECT_OK(GetTensorOrWeights(name.c_str(), &output));
1533 ASSERT_TRUE(output.is_tensor());
1534 if (!exp_out_dims[i].empty()) {
1535 // Removing batch dim.
1536 auto out_dims = std::vector<int>(exp_out_dims[i].begin() + 1,
1537 exp_out_dims[i].end());
1538 VLOG(2) << "Testing output shape for tensor " << name;
1539 EXPECT_THAT(output.tensor()->getDimensions(),
1540 DimsAreArray(out_dims));
1541 }
1542 }
1543 }
1544 }
1545 }
1546
1547 // Expose quantization_ranges_ for tests
quantization_ranges_proxy()1548 std::unordered_map<ITensorProxyPtr*, float>& quantization_ranges_proxy() {
1549 return converter_->quantization_ranges_proxy_;
1550 }
1551
1552 // Expose quantization_ranges_ for tests
quantization_ranges()1553 std::unordered_map<nvinfer1::ITensor*, float>& quantization_ranges() {
1554 return converter_->quantization_ranges_;
1555 }
1556
1557 protected:
1558 template <typename T>
AdjustVectorByDims(std::vector<T> & values,size_t num_elements,const string & name,const char * callingFunc)1559 void AdjustVectorByDims(std::vector<T>& values, size_t num_elements,
1560 const string& name, const char* callingFunc) {
1561 const auto old_size = values.size();
1562 if (num_elements > old_size) {
1563 // Expending vector with 0's.
1564 const std::vector<T> zeros(num_elements - old_size, 0);
1565 values.reserve(num_elements);
1566 values.insert(values.end(), zeros.begin(), zeros.end());
1567 VLOG(2) << "In function " << callingFunc << " the vector '" << name
1568 << "' was extended by " << num_elements - old_size << " zeros";
1569 } else {
1570 // Removing unnecessary elements.
1571 values.resize(num_elements);
1572 VLOG(2) << "Only first " << num_elements << " out of " << old_size
1573 << " elements of the vector '" << name
1574 << "' will be used in function" << callingFunc;
1575 }
1576 }
1577
1578 public:
1579 std::unique_ptr<Converter> converter_;
1580
1581 protected:
1582 Logger& logger_ = *Logger::GetLogger();
1583
1584 private:
1585 TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
1586 cudaStream_t stream_;
1587 std::unique_ptr<Allocator> tensor_buffer_allocator_;
1588
1589 public:
1590 // The scope that contains the graph being converted. Because
1591 // tensor_buffer_allocator_ provides the storage for tensor contents that are
1592 // represented as attributes for graph nodes within scope_,
1593 // tensor_buffer_allocator_ needs to be available when destructing scope_.
1594 // Therefore, scope_ comes after tensor_buffer_allocator_ in the class member
1595 // field list.
1596 Scope scope_;
1597
1598 protected:
1599 std::unordered_map<string, Output> node_inputs_;
1600 };
1601
1602 // Extends the OpConverterTest for variable converters which require a properly
1603 // setup context.
1604 class VariableOpConverterTest : public OpConverterTest {
1605 public:
Reset(TrtPrecisionMode precision_mode_to_test=TrtPrecisionMode::FP32,TrtTestMode trt_mode=TrtTestMode::kImplicitBatch)1606 void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32,
1607 TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) {
1608 OpConverterTest::Reset(precision_mode_to_test, trt_mode, context_.get());
1609 }
1610
CreateContext(const NodeDef & node_def,OpKernel ** kernel,OpKernelContext ** context)1611 void CreateContext(const NodeDef& node_def, OpKernel** kernel,
1612 OpKernelContext** context) {
1613 std::unique_ptr<Device> device_(
1614 DeviceFactory::NewDevice("GPU", {}, "/job:a/replica:0/task:0"));
1615 Device* device_ptr = device_.get();
1616
1617 device_mgr_ = std::make_unique<StaticDeviceMgr>(std::move(device_));
1618
1619 managed_allocator_ = std::make_unique<GpuManagedAllocator>();
1620 Allocator* allocator = managed_allocator_.get();
1621 step_container_ =
1622 std::make_unique<ScopedStepContainer>(0, [](const string&) {});
1623 slice_reader_cache_wrapper_ =
1624 std::make_unique<checkpoint::TensorSliceReaderCacheWrapper>();
1625
1626 flib_def_ = std::make_unique<FunctionLibraryDefinition>(
1627 OpRegistry::Global(), FunctionDefLibrary{});
1628
1629 thread_pool_ =
1630 std::make_unique<thread::ThreadPool>(Env::Default(), "default",
1631 /*num_threads=*/1);
1632 pflr_ = std::make_unique<ProcessFunctionLibraryRuntime>(
1633 device_mgr_.get(), Env::Default(), /*config=*/nullptr,
1634 TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions(),
1635 thread_pool_.get());
1636
1637 FunctionLibraryRuntime* flib = pflr_->GetFLR(device_ptr->name());
1638 ResourceMgr* resource_mgr = device_ptr->resource_manager();
1639
1640 TF_CHECK_OK(NodeProperties::CreateFromNodeDef(
1641 node_def, OpRegistry::Global(), &props_));
1642
1643 OpKernel* kernel_ptr = nullptr;
1644 TF_CHECK_OK(CreateOpKernel(DEVICE_GPU, device_ptr, allocator, flib,
1645 resource_mgr, props_, TF_GRAPH_DEF_VERSION,
1646 &kernel_ptr));
1647 op_kernel_ = std::unique_ptr<OpKernel>(kernel_ptr);
1648
1649 auto* dev_info = device_ptr->tensorflow_accelerator_device_info();
1650 CHECK_NOTNULL(dev_info);
1651 DeviceContext* device_context = dev_info->default_context;
1652
1653 // Note: this setup is not exhaustive.
1654 params_.device = device_ptr;
1655 params_.op_kernel = op_kernel_.get();
1656 params_.resource_manager = resource_mgr;
1657 params_.frame_iter = FrameAndIter(0, 0);
1658 params_.inputs = inputs_;
1659 params_.step_container = step_container_.get();
1660 params_.function_library = flib;
1661 params_.slice_reader_cache = slice_reader_cache_wrapper_.get();
1662 params_.op_device_context = device_context;
1663
1664 context_ = std::make_unique<OpKernelContext>(¶ms_);
1665
1666 // Outputs.
1667 *kernel = op_kernel_.get();
1668 *context = context_.get();
1669 }
1670
1671 // Adds resource for resource variable op converters.
AddTestResource(const string & name,const ResourceHandle & resource)1672 void AddTestResource(const string& name, const ResourceHandle& resource) {
1673 // Add resource for validation.
1674 node_inputs_[name] =
1675 ops::Placeholder(scope_.WithOpName("my_handle"), DT_RESOURCE);
1676
1677 // Add resource for conversion.
1678 TF_EXPECT_OK(AddTensorOrWeights(name, TRT_TensorOrWeights{resource}));
1679 }
1680
1681 private:
1682 // The following pointers manage the kernel context.
1683 std::unique_ptr<DeviceMgr> device_mgr_;
1684 std::unique_ptr<Allocator> managed_allocator_;
1685 std::unique_ptr<ScopedStepContainer> step_container_;
1686 std::unique_ptr<checkpoint::TensorSliceReaderCacheWrapper>
1687 slice_reader_cache_wrapper_;
1688 std::unique_ptr<FunctionLibraryDefinition> flib_def_;
1689 std::unique_ptr<thread::ThreadPool> thread_pool_;
1690 std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
1691 OpKernelContext::Params params_;
1692 std::unique_ptr<OpKernel> op_kernel_;
1693 std::unique_ptr<OpKernelContext> context_;
1694 std::shared_ptr<const NodeProperties> props_;
1695 absl::InlinedVector<TensorValue, 4> inputs_;
1696 };
1697
1698 // General test parameters to be used with ops that take a single input tensor.
1699 struct TestParamBase {
1700 // Concrete input dimensions for the test (including the batch dim)
1701 std::vector<int> input_dims;
1702
1703 // Dimensions to define an input with PartialTensorShape. This can be used to
1704 // define networks with dynamic input shape. It can be left empty, in that
1705 // case AddTestTensor sets partial shapes that are appropriate to TrtTestMode.
1706 std::vector<int> partial_input_dims;
1707
1708 // Concrete (static) output dimensions, including batch size as first dim
1709 std::vector<int> expected_output_dims;
1710
1711 // Parameter vector, has converter specific meaning.
1712 std::vector<int> param;
1713
1714 // Expected status of conversion (with concrete error message)
1715 Status status;
1716
1717 // Expected status of BuildAndRun
1718 Status runtime_status;
1719 };
1720
operator <<(std::ostream & os,const TestParamBase & p)1721 std::ostream& operator<<(std::ostream& os, const TestParamBase& p) {
1722 os << "input_dims" << PrintToString(p.input_dims);
1723 if (!p.partial_input_dims.empty()) {
1724 os << ", partial_input_dims" << PrintToString(p.partial_input_dims);
1725 }
1726 if (!p.expected_output_dims.empty()) {
1727 os << ", exp_out_dims" << PrintToString(p.expected_output_dims);
1728 }
1729 if (!p.param.empty()) {
1730 os << ", param" << PrintToString(p.param);
1731 }
1732 os << ", " << p.status;
1733 return os;
1734 }
1735
1736 // Printing vector with the numbers of type T which defines tensor or shape.
1737 template <typename T>
get_debug_string_for_vector(const std::vector<T> & vector,absl::string_view pComment,absl::string_view name,absl::string_view type="")1738 const std::string get_debug_string_for_vector(const std::vector<T>& vector,
1739 absl::string_view pComment,
1740 absl::string_view name,
1741 absl::string_view type = "") {
1742 const std::string t1 = absl::StrCat(pComment, name, " Dims(nbDims=");
1743 const std::string t2 = absl::StrJoin(vector, ",");
1744 const std::string t3 = type != "" ? absl::StrCat(") of type ", type) : ")";
1745 std::stringstream stream;
1746 stream << t1 << vector.size() << ", d=" << t2 << t3;
1747 return stream.str();
1748 }
1749
1750 // Parameterized version of OpConverterTest. We have the following parameters:
1751 // 1. TrtTestMode: implicit batch, explicit batch, dynamic shape modes
1752 // 2. DataType of the input TF tensors: DT_FLOAT, DT_HALF, DT_INT32
1753 // 3. TrtPrecisionMode argument for the Converter: FP32, FP16, INT8
1754 // We will introduce subclasses that will be instantiated using different
1755 // combinations of the DataType and TrtPrecisionMode parameters.
1756 class ParameterizedOpConverterTestBase
1757 : public OpConverterTest,
1758 public ::testing::WithParamInterface<
1759 std::tuple<TrtTestMode, DataType, TrtPrecisionMode>> {
1760 public:
ParameterizedOpConverterTestBase()1761 ParameterizedOpConverterTestBase()
1762 : trt_mode_(std::get<0>(GetParam())),
1763 tf_type_(std::get<1>(GetParam())),
1764 converter_precision_(std::get<2>(GetParam())) {
1765 LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1766 LOG(INFO) << "tf_type_: " << DebugString(tf_type_);
1767 LOG(INFO) << "trt_mode_: " << DebugString(trt_mode_);
1768 LOG(INFO) << "converter_precision_: " << DebugString(converter_precision_);
1769 LOG(INFO) << "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%";
1770 }
1771
Reset()1772 void Reset() {
1773 OpConverterTest::Reset(converter_precision_, trt_mode_);
1774 input_data_.clear();
1775 }
1776
Reset(TrtPrecisionMode precision)1777 void Reset(TrtPrecisionMode precision) {
1778 OpConverterTest::Reset(precision, trt_mode_);
1779 input_data_.clear();
1780 }
1781
1782 // Getters of protected attributes
get_tf_type()1783 DataType get_tf_type() { return tf_type_; }
get_trt_mode()1784 TrtTestMode get_trt_mode() { return trt_mode_; }
get_converter_precision()1785 TrtPrecisionMode get_converter_precision() { return converter_precision_; }
1786
1787 // Adds an input ITensor for TRT network. Also creates the corresponding TF
1788 // tensor, and stores it in the list of inputs (input_data_).
1789 //
1790 // The TF tensor is always created with concrete static input shape given by
1791 // dims. The ITensor can have static or dynamic shape based on the trt_mode
1792 // attribute. The ITensor shape is set automatically according to the trt_mode
1793 // parameter, unless the user overrides it with an explicit
1794 // partial_input_shape_dims argument.
1795 //
1796 // Parameters:
1797 // - name of the input node
1798 // - dims actual dimensions of the tensor that we will use during the test
1799 // (including explicit batch dim)
1800 // - values initial values for the TF tensor
1801 // - dtype data type of the tensor
1802 // - partial_input_shape dimensions which can include unknown shapes. This can
1803 // be empty, in that case the partial_input_shape will be set automatically
1804 // depending on the trt_mode argument. (This argument also includes explicit
1805 // batch dim).
1806 // - add_input_status adding ITensor to the network can fail in implicit batch
1807 // mode if the batch size is inconsistent. Using the add_input_status arg we
1808 // can test such errors.
1809 //
1810 template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,DataType tf_type,const std::vector<T> & values_inp,const std::vector<int32> & partial_input_shape_dims={},Status add_input_status=Status::OK (),bool fix_values=true)1811 void AddTestTensor(const string& name, const std::vector<int32>& dims,
1812 DataType tf_type, const std::vector<T>& values_inp,
1813 const std::vector<int32>& partial_input_shape_dims = {},
1814 Status add_input_status = Status::OK(),
1815 bool fix_values = true) {
1816 std::vector<T> values(values_inp);
1817 VLOG(2) << "**** AddTestTensor for " << name
1818 << " ***** dims empty() = " << dims.empty()
1819 << " tf_type = " << DebugString(tf_type);
1820 if (!dims.empty()) {
1821 const auto num_elements = std::accumulate(
1822 std::begin(dims), std::end(dims), 1, std::multiplies<double>());
1823 if (!values.empty() && num_elements != values.size()) {
1824 if (fix_values) {
1825 AdjustVectorByDims(values, num_elements, name, "AddTestTensor");
1826 } else {
1827 // Note: for conversion only tests, it is valid to have empty values,
1828 // otherwise the number of elements should match.
1829 LOG(WARNING) << "Expected Test Tensor Shape: " << DebugString(dims)
1830 << ", Received Input Tensor: " << DebugString(values);
1831 }
1832 }
1833 }
1834
1835 std::vector<int32> partial_shape;
1836 if (!partial_input_shape_dims.empty()) {
1837 partial_shape = partial_input_shape_dims;
1838 } else {
1839 if (trt_mode_ == TrtTestMode::kDynamicShape) {
1840 // In dynamic shape mode we make all dims unknown.
1841 partial_shape = std::vector<int32>(dims.size(), -1);
1842 } else {
1843 // Use static (known) input shapes.
1844 partial_shape = dims;
1845 }
1846 if (VLOG_IS_ON(2)) {
1847 VLOG(2) << get_debug_string_for_vector(
1848 partial_shape, "Using partial_shape: for ", name);
1849 }
1850 }
1851 nvinfer1::DataType trt_type;
1852 TF_ASSERT_OK(TfTypeToTrtType(tf_type, &trt_type));
1853 AddTestTensorWithTFDims(name, partial_shape, trt_type, add_input_status);
1854 if (!values.empty()) {
1855 if (VLOG_IS_ON(2)) {
1856 VLOG(2) << get_debug_string_for_vector(
1857 values, "Adding test tensor: for ", name, DataTypeString(tf_type));
1858 }
1859 InputOutputData data{name, AsTensor(values, dims, tf_type)};
1860 VLOG(2) << "Added tensor: " << data.name << " with dtype "
1861 << DataTypeString(data.tensor.dtype());
1862 input_data_.push_back(data);
1863 }
1864 }
1865
1866 // Adds test tensor (same as above) but with the default tf_type defined by
1867 // the test params.
1868 template <typename T = int>
AddTestTensor(const string & name,const std::vector<int32> & dims,const std::vector<T> & values={},const std::vector<int32> & partial_input_shape_dims={})1869 void AddTestTensor(const string& name, const std::vector<int32>& dims,
1870 const std::vector<T>& values = {},
1871 const std::vector<int32>& partial_input_shape_dims = {}) {
1872 AddTestTensor<T>(name, dims, tf_type_, values, partial_input_shape_dims);
1873 }
1874
1875 // Builds and runs the converted network. Checks output tensor shape. Tests
1876 // output values using a matcher. The network can have multiple input and
1877 // output tensors. The inputs are defined by the input_data_ member variable.
BuildAndRun(const string & name,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_types={})1878 void BuildAndRun(const string& name,
1879 const std::vector<std::vector<int>>& expected_output_dims,
1880 const Status& expected_runtime_status,
1881 const std::vector<Matcher<std::vector<float>>>& matcher,
1882 const std::vector<DataType>& out_tf_types = {}) {
1883 TensorShape shape;
1884 const int n_output = expected_output_dims.size();
1885 ASSERT_EQ(n_output, matcher.size());
1886 DataVec output_data;
1887 for (int i = 0; i < n_output; i++) {
1888 TF_EXPECT_OK(
1889 TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1890 string out_name = (i == 0) ? name : StrCat(name, ":", i);
1891 DataType out_tf_type =
1892 out_tf_types.size() > i ? out_tf_types[i] : tf_type_;
1893 InputOutputData data{
1894 out_name, ConstructTensor(shape.num_elements(), 0, out_tf_type)};
1895 output_data.push_back(data);
1896 }
1897 const int batch_size =
1898 input_data_.empty() ||
1899 TensorShapeUtils::IsScalar(input_data_[0].tensor.shape())
1900 ? 1
1901 : input_data_[0].tensor.shape().dim_size(0);
1902 Status stat =
1903 OpConverterTest::BuildAndRun(input_data_, &output_data, batch_size);
1904 ASSERT_EQ(expected_runtime_status.ok(), stat.ok())
1905 << "expected status: " << expected_runtime_status
1906 << ", actual status: " << stat;
1907 if (expected_runtime_status.ok() && stat.ok()) {
1908 for (int i = 0; i < n_output; i++) {
1909 // Check the shape of the actual output tensors
1910 TF_EXPECT_OK(
1911 TensorShapeUtils::MakeShape(expected_output_dims[i], &shape));
1912 EXPECT_TRUE(output_data[i].tensor.shape() == shape)
1913 << "Expected shape: " << shape.DebugString() << ", actual shape: "
1914 << output_data[i].tensor.shape().DebugString();
1915 EXPECT_THAT(GetDataAsFloat(output_data[i]), matcher[i]);
1916 }
1917 }
1918 }
1919
1920 // Runs validation and conversion. If conversion is successfull then builds
1921 // the TRT network, executes it and checks the output. Handles multiple output
1922 // tensors.
TestOpConverterMultiOut(const string & name,const NodeDef node_def,const std::vector<std::vector<int>> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const std::vector<Matcher<std::vector<float>>> & matcher,const std::vector<DataType> & out_tf_type={})1923 void TestOpConverterMultiOut(
1924 const string& name, const NodeDef node_def,
1925 const std::vector<std::vector<int>>& expected_output_dims,
1926 const Status& expected_conversion_status,
1927 const Status& expected_runtime_status,
1928 const std::vector<Matcher<std::vector<float>>>& matcher,
1929 const std::vector<DataType>& out_tf_type = {}) {
1930 RunValidationAndConversion(node_def, expected_conversion_status, name,
1931 expected_output_dims);
1932 if (expected_conversion_status.ok()) {
1933 BuildAndRun(name, expected_output_dims, expected_runtime_status, matcher,
1934 out_tf_type);
1935 }
1936 }
1937
1938 // Runs validation and conversion. If conversion is successfull then builds
1939 // the TRT network, executes it and checks the output.
TestOpConverter(const string & name,const NodeDef node_def,const std::vector<int> & expected_output_dims,const Status & expected_conversion_status,const Status & expected_runtime_status,const Matcher<std::vector<float>> & matcher,const std::vector<DataType> & out_tf_types={})1940 void TestOpConverter(const string& name, const NodeDef node_def,
1941 const std::vector<int>& expected_output_dims,
1942 const Status& expected_conversion_status,
1943 const Status& expected_runtime_status,
1944 const Matcher<std::vector<float>>& matcher,
1945 const std::vector<DataType>& out_tf_types = {}) {
1946 TestOpConverterMultiOut(
1947 name, node_def, std::vector<std::vector<int>>({expected_output_dims}),
1948 expected_conversion_status, expected_runtime_status,
1949 std::vector<Matcher<std::vector<float>>>({matcher}), out_tf_types);
1950 }
1951
1952 protected:
1953 const TrtTestMode trt_mode_;
1954 const DataType tf_type_;
1955 const TrtPrecisionMode converter_precision_;
1956 DataVec input_data_;
1957 };
1958
1959 template <typename T>
1960 class OpConverter_UnaryTest : public ParameterizedOpConverterTestBase {
1961 public:
1962 template <typename S>
RunTests(const string & testName,const OperationMap<S> & map,std::map<std::string,std::pair<std::function<NodeDef (DataType)>,T (*)(T)>> & op_map,const std::vector<T> input_values,const std::string input_name="input",float max_abs_error=0.0001,bool nan_sensitive=true)1963 void RunTests(
1964 const string& testName, const OperationMap<S>& map,
1965 std::map<std::string,
1966 std::pair<std::function<NodeDef(DataType)>, T (*)(T)>>& op_map,
1967 const std::vector<T> input_values, const std::string input_name = "input",
1968 float max_abs_error = 0.0001, bool nan_sensitive = true) {
1969 // Prepare test parameters.
1970 auto p = TestParamBase{
1971 {1, 1, 2, 3}, // input dims
1972 {}, // input partial dims
1973 {1, 1, 2, 3}, // expected output dims
1974 };
1975
1976 // Get list of ops to test.
1977 std::vector<string> ops_to_test;
1978 for (auto& pair : map) {
1979 ops_to_test.push_back(pair.first);
1980 }
1981
1982 for (const string& op_name : ops_to_test) {
1983 SCOPED_TRACE(op_name);
1984 if (!op_map.count(op_name)) {
1985 FAIL() << testName << " op test map does not contain op " << op_name;
1986 }
1987
1988 const DataType tf_type = get_tf_type();
1989 const NodeDef& node_def = op_map[op_name].first(tf_type);
1990 runExpectedToFailTest(node_def, input_name, input_values, op_name);
1991
1992 Status conv_status = Status::OK();
1993 if (trt_mode_ == TrtTestMode::kImplicitBatch &&
1994 (op_name == "Sign" || op_name == "Round" ||
1995 op_name == "LogicalNot")) {
1996 conv_status =
1997 errors::Unimplemented("Unary op: '", op_name,
1998 "' is not supported in implicit batch mode");
1999 }
2000
2001 Reset();
2002 const DataType input_tf_type = op_name == "Cast" ? DT_HALF : tf_type;
2003 const DataType output_tf_type = op_name == "Cast" ? DT_FLOAT : tf_type;
2004
2005 AddTestTensor("input", p.input_dims, input_tf_type, input_values);
2006
2007 std::vector<float> output;
2008 std::transform(input_values.begin(), input_values.end(),
2009 std::back_inserter(output), op_map[op_name].second);
2010
2011 TestOpConverter("my_unary", node_def, p.expected_output_dims, conv_status,
2012 Status::OK(),
2013 ArrayFloatNear(output, max_abs_error, nan_sensitive),
2014 {output_tf_type});
2015 }
2016 }
runExpectedToFailTest(const NodeDef & node_def,const std::string & input_name,const std::vector<T> & input_values,const std::string & op_name)2017 void runExpectedToFailTest(const NodeDef& node_def,
2018 const std::string& input_name,
2019 const std::vector<T>& input_values,
2020 const std::string& op_name) {
2021 // Input is weights, should fail.
2022 Reset();
2023 std::string error =
2024 "The input \"" + input_name + "\" for " + op_name + " must be a tensor";
2025 AddTestWeights("input", {1, 2, 3}, input_values, get_tf_type());
2026 RunValidationAndConversion(node_def, error::UNIMPLEMENTED, error);
2027
2028 // Input has 0 dimensions, should fail.
2029 Reset();
2030 std::vector<int32> dims = {};
2031 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2032 dims = {1};
2033 }
2034 error = "At least 1 dimension is required for UNARY operation '" + op_name +
2035 "'";
2036 AddTestTensor("input", dims);
2037 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT, error);
2038 }
2039 };
2040
2041 template <typename T>
2042 class OpConverter_BinaryTest : public ParameterizedOpConverterTestBase {
2043 public:
2044 template <typename S>
RunTests(const OperationMap<S> & map,std::map<std::string,std::pair<std::function<NodeDef (DataType)>,std::vector<T>>> & op_test_info,const std::vector<std::vector<T>> & data)2045 void RunTests(
2046 const OperationMap<S>& map,
2047 std::map<std::string,
2048 std::pair<std::function<NodeDef(DataType)>, std::vector<T>>>&
2049 op_test_info,
2050 const std::vector<std::vector<T>>& data) {
2051 const std::vector<DataType> bool_types{DT_BOOL}, default_types{};
2052 std::vector<string> logical_ops{"Greater", "Less", "Equal"};
2053 std::vector<string> combined_ops{"GreaterEqual", "LessEqual"};
2054 const DataType tf_type = get_tf_type();
2055 AttrValue dtype;
2056 dtype.set_type(tf_type);
2057 std::map<std::string, NodeDef> nodes;
2058 for (const auto op_name : combined_ops) {
2059 nodes[op_name] = MakeNodeDef("my_binary", op_name, {"input1", "input2"},
2060 {{"T", dtype}});
2061 }
2062
2063 for (auto& iter : map) {
2064 const string& op_name = iter.first;
2065 if (!op_test_info.count(op_name)) {
2066 FAIL() << "Binary op test map does not contain op " << op_name;
2067 }
2068 const auto comb_op = find_name(op_name, combined_ops);
2069 const auto& node_def =
2070 comb_op ? nodes[op_name] : op_test_info[op_name].first(tf_type);
2071
2072 for (const bool operand_1_is_tensor : {true, false}) {
2073 for (const bool operand_2_is_tensor : {true, false}) {
2074 SCOPED_TRACE(StrCat(op_name, "_", operand_1_is_tensor ? "T" : "W",
2075 operand_2_is_tensor ? "T" : "W"));
2076 Reset();
2077 if (!operand_1_is_tensor && !operand_2_is_tensor) {
2078 // In that case the only test which should be launched is in
2079 // runExpectedToFailTest
2080 runExpectedToFailTest(op_name, node_def);
2081 continue;
2082 }
2083
2084 const bool logical_op = comb_op || find_name(op_name, logical_ops);
2085 auto conv_status = Status::OK();
2086 if (tf_type == DT_BOOL || logical_op) {
2087 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2088 conv_status = errors::Unimplemented(
2089 "Binary op: '", op_name,
2090 "' is not supported in implicit batch mode");
2091 } else if (!logical_op &&
2092 (!operand_1_is_tensor || !operand_2_is_tensor)) {
2093 conv_status = errors::InvalidArgument(
2094 "Both inputs of '", op_name, "' are expected to be tensors");
2095 }
2096 }
2097
2098 if (operand_1_is_tensor) {
2099 AddTestTensor("input1", {2, 1, 2}, data[0]);
2100 } else {
2101 AddTestWeights("input1", {1, 2}, data[1], tf_type);
2102 }
2103 if (operand_2_is_tensor) {
2104 AddTestTensor("input2", {2, 2, 1}, data[2]);
2105 } else {
2106 AddTestWeights("input2", {2, 1}, data[3], tf_type);
2107 }
2108
2109 TestOpConverter("my_binary", node_def, {2, 2, 2}, conv_status,
2110 Status::OK(),
2111 ElementsAreArray(op_test_info[op_name].second),
2112 logical_op ? bool_types : default_types);
2113 }
2114 }
2115 }
2116 }
2117
runExpectedToFailTest(const std::string & op_name,const NodeDef & node)2118 void runExpectedToFailTest(const std::string& op_name, const NodeDef& node) {
2119 AddTestWeights("input1", {1}, {1}, tf_type_);
2120 AddTestWeights("input2", {1}, {1}, tf_type_);
2121 const string error =
2122 "Constant folding is falled back to TensorFlow, "
2123 "binary op '" +
2124 op_name + "' received both input as constant";
2125 RunValidationAndConversion(node, error::UNIMPLEMENTED, error);
2126 }
2127 };
2128
2129 // Op converter test in FP32 mode. While for debugging purposes it might make
2130 // sense to run over all possible combinations, normally a subset of them
2131 // would be sufficient:
2132 // - All valid options to TrtTestMode (implicit, explicit, dynamic shape)
2133 // - DataType: is the TF data type of the input tensors. This usually only
2134 // influences the data type added by Converter::AddInputTensor. We test the
2135 // valid combinations of input data types in AddAndGetInputs, therefore
2136 // for most of the OpConverterTest its is sufficient to test for DT_FLOAT.
2137 // - TrtPrecisionMode: valid options are FP32, FP16 and INT8. This influences
2138 // how TRT handles the precision inside the TRT network, but should not matter
2139 // for the TF -> TRT conversion. Therefore it should be sufficient to test
2140 // for FP32.
2141 typedef ParameterizedOpConverterTestBase OpConverter_FP32_Test;
2142 // Base class for tests that need to be tested for both FP32 and FP16.
2143 typedef ParameterizedOpConverterTestBase OpConverter_FP32_FP16_Test;
2144 // Base class for Binary tests that need to be tested
2145 typedef OpConverter_BinaryTest<float> OpConverter_FP32_FP16_BinaryTest;
2146 typedef OpConverter_BinaryTest<int> OpConverter_BOOL_BinaryTest;
2147 // Base class for tests that need to be tested for FP32, FP16, and INT32
2148 typedef ParameterizedOpConverterTestBase OpConverter_FP32_FP16_INT32_Test;
2149 // Base class for tests that need to be tested for INT32
2150 typedef ParameterizedOpConverterTestBase OpConverter_INT32_Test;
2151 // Base class for Unary tests that need to be tested
2152 typedef OpConverter_UnaryTest<float> OpConverter_FP32_UnaryTest;
2153 typedef OpConverter_UnaryTest<int> OpConverter_BOOL_Test;
2154
2155 // Instantiate parameter combinations to OpConverter_<DT_X...>_Test
2156 INSTANTIATE_TEST_CASE_P(
2157 OpConvTestInstantiation, OpConverter_FP32_Test,
2158 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2159 ::testing::Values(DT_FLOAT),
2160 ::testing::Values(TrtPrecisionMode::FP32)));
2161
2162 INSTANTIATE_TEST_CASE_P(
2163 OpConvTestInstantiation, OpConverter_FP32_FP16_Test,
2164 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2165 ::testing::Values(DT_FLOAT, DT_HALF),
2166 ::testing::Values(TrtPrecisionMode::FP32)));
2167
2168 INSTANTIATE_TEST_CASE_P(
2169 OpConvTestInstantiation, OpConverter_FP32_FP16_INT32_Test,
2170 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2171 ::testing::Values(DT_FLOAT, DT_HALF, DT_INT32),
2172 ::testing::Values(TrtPrecisionMode::FP32)));
2173
2174 INSTANTIATE_TEST_CASE_P(
2175 OpConvTestInstantiation, OpConverter_INT32_Test,
2176 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2177 ::testing::Values(DT_INT32),
2178 ::testing::Values(TrtPrecisionMode::FP32)));
2179
2180 INSTANTIATE_TEST_CASE_P(
2181 OpConvTestInstantiation, OpConverter_FP32_UnaryTest,
2182 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2183 ::testing::Values(DT_FLOAT),
2184 ::testing::Values(TrtPrecisionMode::FP32)));
2185
2186 INSTANTIATE_TEST_CASE_P(
2187 OpConvTestInstantiation, OpConverter_BOOL_Test,
2188 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2189 ::testing::Values(DT_BOOL),
2190 ::testing::Values(TrtPrecisionMode::FP32)));
2191
2192 INSTANTIATE_TEST_CASE_P(
2193 OpConvTestInstantiation, OpConverter_FP32_FP16_BinaryTest,
2194 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2195 ::testing::Values(DT_FLOAT, DT_HALF),
2196 ::testing::Values(TrtPrecisionMode::FP32)));
2197
2198 INSTANTIATE_TEST_CASE_P(
2199 OpConvTestInstantiation, OpConverter_BOOL_BinaryTest,
2200 ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
2201 ::testing::Values(DT_BOOL),
2202 ::testing::Values(TrtPrecisionMode::FP32)));
2203
2204 template <typename T>
CopyTensorElements(const Tensor & tensor,protobuf::RepeatedField<T> * out)2205 void CopyTensorElements(const Tensor& tensor, protobuf::RepeatedField<T>* out) {
2206 out->Clear();
2207 if (tensor.NumElements() == 0) return;
2208
2209 // TensorProto does not need to have all the elements present and can truncate
2210 // trailing elements with the same value for compressed representation. Such
2211 // elements are derived based on the tensor shape.
2212 const auto flat = tensor.flat<T>();
2213 int64 last_index = 0;
2214 for (int64 i = 0; i < tensor.NumElements(); ++i) {
2215 if (flat(i) != flat(last_index)) {
2216 last_index = i;
2217 }
2218 }
2219
2220 int num_out_elements = last_index + 1;
2221 out->Reserve(num_out_elements);
2222 out->AddNAlreadyReserved(num_out_elements);
2223 const T* src = flat.data();
2224 T* dst = out->mutable_data();
2225 std::copy(src, src + num_out_elements, dst);
2226 }
2227
2228 template <DataType dtype, typename CType>
TestConvertVariableV2(VariableOpConverterTest * test)2229 void TestConvertVariableV2(VariableOpConverterTest* test) {
2230 struct TestParam {
2231 string container;
2232 string shared_name;
2233 std::vector<int> dims;
2234 float epsilon;
2235 Status conversion_status;
2236 };
2237
2238 std::vector<TestParam> test_param = {
2239 {"", "var0", {}, 0.001, Status::OK()},
2240 {"", "var0", {64}, 0.001, Status::OK()},
2241 {"", "var0", {8, 16}, 0.001, Status::OK()},
2242 {"box", "var", {8, 16}, 0.001, Status::OK()}};
2243 for (auto p : test_param) {
2244 // Create node definition.
2245 NodeDef node_def;
2246 std::vector<int64_t> dims_64(p.dims.begin(), p.dims.end());
2247 TensorShape shape = TensorShape(absl::Span<int64_t>(dims_64));
2248 TF_CHECK_OK(NodeDefBuilder("my_var", "VariableV2")
2249 .Attr("dtype", dtype)
2250 .Attr("shape", shape)
2251 .Attr("container", p.container)
2252 .Attr("shared_name", p.shared_name)
2253 .Finalize(&node_def));
2254
2255 OpKernel* kernel;
2256 OpKernelContext* context;
2257 test->CreateContext(node_def, &kernel, &context);
2258
2259 test->Reset(TrtPrecisionMode::FP32, TrtTestMode::kDynamicShape);
2260
2261 // Set the value of the variable according to p.dims.
2262 int var_size = std::accumulate(p.dims.begin(), p.dims.end(), 1,
2263 std::multiplies<int>());
2264 std::vector<CType> expected_value;
2265 expected_value.reserve(var_size);
2266 for (int i = 0; i < var_size; i++) {
2267 expected_value.push_back((CType)i);
2268 }
2269
2270 // To set the variable, we get the tensor by executing the VariableV2 op
2271 // rather than creating the resource directly in the manager, because:
2272 // 1) LegacyVar defined in `variable_ops.cc` is not accessible.
2273 // 2) Tensor::set_shape is private, VariableOp is a friend class.
2274 kernel->Compute(context);
2275 Tensor* tensor_ptr = context->mutable_output(0);
2276 CHECK_NOTNULL(tensor_ptr);
2277 // We allocate the tensor in the temporary memory. Note that creating a
2278 // tensor in this scope and sharing the underlying storage by copy would
2279 // lead to double destruction.
2280 AllocatorAttributes attr;
2281 attr.set_gpu_compatible(true);
2282 attr.set_nic_compatible(true);
2283 OP_REQUIRES_OK(context,
2284 context->allocate_temp(dtype, shape, tensor_ptr, attr));
2285 // The tensor is allocated on GPU. We copy the values from the CPU.
2286 auto tensor_flat = tensor_ptr->flat<CType>();
2287 CHECK_NOTNULL(tensor_flat.data());
2288 auto ret = cudaMemcpy(tensor_flat.data(), expected_value.data(),
2289 expected_value.size() * sizeof(CType),
2290 cudaMemcpyHostToDevice);
2291 CHECK_EQ(ret, 0);
2292
2293 test->RunValidationAndConversion(node_def);
2294 TRT_TensorOrWeights output;
2295 TF_EXPECT_OK(test->GetTensorOrWeights("my_var", &output));
2296 EXPECT_THAT(output.weights(),
2297 ShapedWeightsHasDimsAndValues<CType>(p.dims, expected_value));
2298 }
2299 }
2300
TEST_F(VariableOpConverterTest,ConvertVariableV2)2301 TEST_F(VariableOpConverterTest, ConvertVariableV2) {
2302 TestConvertVariableV2<DT_FLOAT, float>(this);
2303 TestConvertVariableV2<DT_HALF, Eigen::half>(this);
2304 }
2305
2306 template <DataType dtype, typename CType>
TestConvertReadVariableOp(VariableOpConverterTest * test)2307 void TestConvertReadVariableOp(VariableOpConverterTest* test) {
2308 struct TestParam {
2309 string container;
2310 string name;
2311 std::vector<int> dims;
2312 float epsilon;
2313 Status conversion_status;
2314 };
2315
2316 std::vector<TestParam> test_param = {
2317 {"", "var0", {}, 0.001, Status::OK()},
2318 {"", "var0", {64}, 0.001, Status::OK()},
2319 {"", "var0", {8, 16}, 0.001, Status::OK()},
2320 {"box", "var", {8, 16}, 0.001, Status::OK()}};
2321 for (auto p : test_param) {
2322 // Create node definition.
2323 NodeDefBuilder::NodeOut rvo_input =
2324 NodeDefBuilder::NodeOut("my_handle", 0, DT_RESOURCE);
2325 NodeDef node_def;
2326 std::vector<int64_t> dims_64(p.dims.begin(), p.dims.end());
2327 TensorShape shape =
2328 TensorShape(gtl::ArraySlice<int64_t>(dims_64)); // non-absl ok
2329 TF_CHECK_OK(NodeDefBuilder("my_var", "ReadVariableOp")
2330 .Attr("dtype", dtype)
2331 .Attr("_shape", shape)
2332 .Input(rvo_input)
2333 .Finalize(&node_def));
2334
2335 OpKernel* kernel;
2336 OpKernelContext* context;
2337 test->CreateContext(node_def, &kernel, &context);
2338
2339 test->Reset(TrtPrecisionMode::FP32, TrtTestMode::kDynamicShape);
2340
2341 // Set the value of the variable according to p.dims.
2342 int var_size = std::accumulate(p.dims.begin(), p.dims.end(), 1,
2343 std::multiplies<int>());
2344 std::vector<CType> expected_value;
2345 expected_value.reserve(var_size);
2346 for (int i = 0; i < var_size; i++) {
2347 // Set expected_value[i] = (cast)i.
2348 expected_value.push_back((CType)i);
2349 }
2350
2351 // Create a resource handle.
2352 DtypeAndPartialTensorShape dtype_and_shape;
2353 dtype_and_shape.dtype = dtype;
2354 TF_CHECK_OK(PartialTensorShape::BuildPartialTensorShape(
2355 gtl::ArraySlice<int64_t>(dims_64), // non-absl ok
2356 &dtype_and_shape.shape));
2357 ResourceHandle handle = MakeResourceHandle<Var>(
2358 context, p.container, p.name,
2359 std::vector<DtypeAndPartialTensorShape>{dtype_and_shape});
2360
2361 // Create input resource with the handle.
2362 test->AddTestResource("my_handle", handle);
2363
2364 // Create a resource with this handle.
2365 Var* resource = new Var(dtype);
2366 TF_EXPECT_OK(CreateResource(context, handle, resource));
2367
2368 // Setup the tensor of the variable.
2369 // We allocate the tensor in the temporary memory. Note that creating a
2370 // tensor in this scope and sharing the underlying storage by copy would
2371 // lead to double destruction.
2372 AllocatorAttributes attr_value;
2373 attr_value.set_gpu_compatible(true);
2374 attr_value.set_nic_compatible(true);
2375 TF_EXPECT_OK(
2376 context->allocate_temp(dtype, shape, resource->tensor(), attr_value));
2377 // The tensor is allocated on GPU. We copy the values from the CPU.
2378 auto tensor_flat = resource->tensor()->flat<CType>();
2379 CHECK(tensor_flat.data());
2380 auto ret = cudaMemcpy(tensor_flat.data(), expected_value.data(),
2381 expected_value.size() * sizeof(CType),
2382 cudaMemcpyHostToDevice);
2383 CHECK_EQ(ret, 0);
2384
2385 test->RunValidationAndConversion(node_def);
2386 TRT_TensorOrWeights output;
2387 TF_EXPECT_OK(test->GetTensorOrWeights("my_var", &output));
2388 EXPECT_THAT(output.weights(),
2389 ShapedWeightsHasDimsAndValues<CType>(p.dims, expected_value));
2390 }
2391 }
2392
TEST_F(VariableOpConverterTest,ConvertReadVariableOp)2393 TEST_F(VariableOpConverterTest, ConvertReadVariableOp) {
2394 TestConvertReadVariableOp<DT_FLOAT, float>(this);
2395 TestConvertReadVariableOp<DT_HALF, Eigen::half>(this);
2396 }
2397
2398 template <DataType dtype, typename InputCType, typename OutputCType>
TestConvertConst(OpConverterTest * test)2399 void TestConvertConst(OpConverterTest* test) {
2400 NodeDef node_def;
2401 node_def.set_name("my_const");
2402 node_def.set_op("Const");
2403
2404 auto reset_and_test = [&node_def, test](
2405 const Tensor& tensor, const bool as_tensor_content,
2406 const std::vector<int>& expected_dims,
2407 const std::vector<OutputCType>& expected_value) {
2408 test->Reset();
2409
2410 TensorProto* tensor_attr =
2411 (*node_def.mutable_attr())["value"].mutable_tensor();
2412 tensor_attr->Clear();
2413
2414 if (as_tensor_content) {
2415 tensor.AsProtoTensorContent(tensor_attr);
2416 } else {
2417 tensor.shape().AsProto(tensor_attr->mutable_tensor_shape());
2418 tensor_attr->set_dtype(tensor.dtype());
2419
2420 if (tensor.dtype() == DT_FLOAT) {
2421 CopyTensorElements<float>(tensor, tensor_attr->mutable_float_val());
2422 } else if (tensor.dtype() == DT_INT32) {
2423 CopyTensorElements<int32>(tensor, tensor_attr->mutable_int_val());
2424 } else {
2425 tensor.AsProtoField(tensor_attr);
2426 }
2427 }
2428 test->RunValidationAndConversion(node_def);
2429 TRT_TensorOrWeights output;
2430 TF_EXPECT_OK(test->GetTensorOrWeights("my_const", &output));
2431 EXPECT_THAT(output.weights(), ShapedWeightsHasDimsAndValues<OutputCType>(
2432 expected_dims, expected_value));
2433 };
2434
2435 auto& attr = *node_def.mutable_attr();
2436 attr["dtype"].set_type(dtype);
2437 {
2438 // By default empty tensor will pick DT_FLOAT as data type and we fix it
2439 // here.
2440 Tensor t(dtype); // Empty tensor.
2441 reset_and_test(t, false, {}, {});
2442 }
2443 {
2444 Tensor t = test::AsScalar<InputCType>(12);
2445 std::vector<int> expected_dims{1};
2446 // Scalars are represented as rank 0 tensors.
2447 expected_dims.clear();
2448 reset_and_test(t, false, expected_dims, {12});
2449 reset_and_test(t, true, expected_dims, {12});
2450 }
2451 {
2452 Tensor t = test->AsTensor<InputCType>({1, 2});
2453 reset_and_test(t, false, {2}, {1, 2});
2454 reset_and_test(t, true, {2}, {1, 2});
2455 }
2456 {
2457 Tensor t =
2458 test->AsTensor<InputCType>({1, 2, 3, 4, 5, 6}, TensorShape({2, 3}));
2459 reset_and_test(t, false, {2, 3}, {1, 2, 3, 4, 5, 6});
2460 reset_and_test(t, true, {2, 3}, {1, 2, 3, 4, 5, 6});
2461 }
2462 {
2463 // Set all tensor elements to the same value. Such tensors are encoded
2464 // using a single element list in tensor proto.
2465 Tensor t =
2466 test->AsTensor<InputCType>({1, 1, 1, 1, 1, 1}, TensorShape({2, 3}));
2467 reset_and_test(t, false, {2, 3}, {1, 1, 1, 1, 1, 1});
2468 reset_and_test(t, true, {2, 3}, {1, 1, 1, 1, 1, 1});
2469 }
2470 {
2471 // Set trailing tensor elements to the same value. Such tensors are
2472 // encoded by truncating all equal elements except the first one.
2473 Tensor t =
2474 test->AsTensor<InputCType>({2, 2, 1, 1, 1, 1}, TensorShape({2, 3}));
2475 reset_and_test(t, false, {2, 3}, {2, 2, 1, 1, 1, 1});
2476 reset_and_test(t, true, {2, 3}, {2, 2, 1, 1, 1, 1});
2477 }
2478 }
2479
TEST_F(OpConverterTest,ConvertConst)2480 TEST_F(OpConverterTest, ConvertConst) {
2481 {
2482 Reset();
2483 NodeDef node_def = MakeConstNodeDef<double>("my_const", {});
2484 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2485 "Unsupported tensorflow data type double");
2486 }
2487 {
2488 Reset();
2489 Tensor tensor =
2490 AsTensor<int64_t>({1, std::numeric_limits<int64_t>::max(), 1, 1, 1,
2491 std::numeric_limits<int64_t>::lowest()},
2492 TensorShape({2, 3}));
2493 NodeDef node_def;
2494 node_def.set_name("my_const");
2495 node_def.set_op("Const");
2496 (*node_def.mutable_attr())["dtype"].set_type(DT_INT64);
2497 TensorProto* tensor_attr =
2498 (*node_def.mutable_attr())["value"].mutable_tensor();
2499 tensor_attr->Clear();
2500 tensor.AsProtoTensorContent(tensor_attr);
2501 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
2502 "outside the range of int32");
2503 }
2504
2505 TestConvertConst<DT_FLOAT, float, float>(this);
2506 TestConvertConst<DT_INT8, int8, int32>(this);
2507 TestConvertConst<DT_UINT8, uint8, int32>(this);
2508 TestConvertConst<DT_INT16, int16, int32>(this);
2509 TestConvertConst<DT_UINT16, uint16, int32>(this);
2510 TestConvertConst<DT_INT32, int32, int32>(this);
2511 TestConvertConst<DT_UINT32, uint32, int32>(this);
2512 TestConvertConst<DT_INT64, int64, int32>(this);
2513 TestConvertConst<DT_UINT64, uint64, int32>(this);
2514 }
2515
2516 template <typename T>
CreateFusedBatchNormOp(DataType tf_type,std::string data_format,bool is_training,float epsilon)2517 NodeDef CreateFusedBatchNormOp(DataType tf_type, std::string data_format,
2518 bool is_training, float epsilon) {
2519 Scope s = Scope::NewRootScope();
2520 auto x = ops::Placeholder(s.WithOpName("x"), tf_type);
2521 auto scale = ops::Placeholder(s.WithOpName("scale"), tf_type);
2522 auto offset = ops::Placeholder(s.WithOpName("offset"), tf_type);
2523 auto mean = ops::Placeholder(s.WithOpName("mean"), tf_type);
2524 auto variance = ops::Placeholder(s.WithOpName("variance"), tf_type);
2525 typename T::Attrs attrs;
2526 attrs.data_format_ = data_format;
2527 attrs.is_training_ = is_training;
2528 if (epsilon > 0) {
2529 attrs.epsilon_ = epsilon;
2530 } else {
2531 EXPECT_GE(epsilon, 0);
2532 }
2533 return T(s.WithOpName("my_batchnorm"), x, scale, offset, mean, variance,
2534 attrs)
2535 .operation.node()
2536 ->def();
2537 }
2538
TEST_P(OpConverter_FP32_Test,ConvertFusedBatchNorm)2539 TEST_P(OpConverter_FP32_Test, ConvertFusedBatchNorm) {
2540 using OpFunc = std::function<NodeDef(DataType, std::string, bool, float)>;
2541 std::vector<OpFunc> get_node_def_vec{
2542 CreateFusedBatchNormOp<ops::FusedBatchNorm>,
2543 CreateFusedBatchNormOp<ops::FusedBatchNormV2>,
2544 CreateFusedBatchNormOp<ops::FusedBatchNormV3>};
2545
2546 struct TestParam {
2547 std::string data_format;
2548 int tensor_input_idx; // Index of an input that will be provided as tensor.
2549 bool is_training;
2550 float epsilon;
2551 Status conversion_status;
2552 bool keep_channel_unknown;
2553 };
2554
2555 struct NodeInput {
2556 std::string name;
2557 std::vector<int> dims;
2558 std::vector<float> val;
2559 };
2560 std::vector<NodeInput> node_input_nchw{
2561 {"x", {2, 3, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}},
2562 {"scale", {3}, {7, 8, 9}},
2563 {"offset", {3}, {10, 20, 30}},
2564 {"mean", {3}, {1, 2, 3}},
2565 {"variance", {3}, {4, 5, 6}}};
2566
2567 std::vector<NodeInput> node_input_nhwc{
2568 {"x", {2, 2, 1, 3}, {1, 3, 5, 2, 4, 6, 7, 9, 11, 8, 10, 12}},
2569 {"scale", {3}, {7, 8, 9}},
2570 {"offset", {3}, {10, 20, 30}},
2571 {"mean", {3}, {1, 2, 3}},
2572 {"variance", {3}, {4, 5, 6}}};
2573
2574 std::vector<float> expected_output_nchw{
2575 10.0, 13.495633, 23.574135, 27.148273, 37.342354, 41.013527,
2576 30.9738, 34.469433, 45.018955, 48.59309, 59.369415, 63.04059};
2577
2578 std::vector<float> expected_output_nhwc{
2579 10.0, 23.574135, 37.342354, 13.495633, 27.148273, 41.013527,
2580 30.9738, 45.018955, 59.369415, 34.469433, 48.59309, 63.04059};
2581
2582 for (auto get_node_def : get_node_def_vec) {
2583 NodeDef tmp_node_def = get_node_def(tf_type_, "NCHW", true, 0);
2584 std::string op_name = tmp_node_def.op();
2585 std::vector<TestParam> test_param{
2586 {"NCHW", 0, true, 0,
2587 errors::Unimplemented(
2588 StrCat(op_name, " only supports is_training=false"))},
2589 {"NCHW", 1, false, 0,
2590 errors::Unimplemented(StrCat("The input \"scale\" for ", op_name,
2591 " must be a constant"))},
2592 {"NCHW", 2, false, 0,
2593 errors::Unimplemented(StrCat("The input \"offset\" for ", op_name,
2594 " must be a constant"))},
2595 {"NCHW", 3, false, 0,
2596 errors::Unimplemented(StrCat("The input \"mean\" for ", op_name,
2597 " must be a constant"))},
2598 {"NCHW", 4, false, 0,
2599 errors::Unimplemented(StrCat("The input \"variance\" for ", op_name,
2600 " must be a constant"))},
2601 {"NCHW", 0, false, 0.01},
2602 {"NHWC", 0, false, 0.01}};
2603 if (trt_mode_ == TrtTestMode::kDynamicShape) {
2604 test_param.push_back(
2605 {"NCHW", 0, false, 0.01,
2606 errors::InvalidArgument("Channel dimension must be static"), true});
2607 test_param.push_back(
2608 {"NHWC", 0, false, 0.01,
2609 errors::InvalidArgument("Channel dimension must be static"), true});
2610 }
2611 for (auto p : test_param) {
2612 Reset();
2613 NodeDef node_def =
2614 get_node_def(tf_type_, p.data_format, p.is_training, p.epsilon);
2615 std::vector<NodeInput> node_input =
2616 p.data_format == "NCHW" ? node_input_nchw : node_input_nhwc;
2617 std::vector<float> expected_output =
2618 p.data_format == "NCHW" ? expected_output_nchw : expected_output_nhwc;
2619 for (int i = 0; i < node_input.size(); i++) {
2620 if (i == 0 || i == p.tensor_input_idx) {
2621 // The first input (x) is always added as a tensor, and it has shape
2622 // NCHW/NHWC. The other inputs are per channel values (1D, size C).
2623 //
2624 // In implicit batch mode, it is not possible to add any of the 1D
2625 // inputs as a tensor: the first dim is always treated as batch dim in
2626 // implicit batch mode, and that has to agree for all tensors. We have
2627 // two input tensors with shapes NCHW and C and in general N != C.
2628 // The converter already picked up N from the fist input, and reports
2629 // an error when we try to add any other tensors with not matching
2630 // first dim.
2631 //
2632 // This restriction does not apply in explicit batch mode: the tensors
2633 // can have different first dim. The converter still expects that only
2634 // the first arg is a tensor. TODO(tfeher) Check if one can relax this
2635 // restriction.
2636 Status expected_status =
2637 (i != 0 && trt_mode_ == TrtTestMode::kImplicitBatch)
2638 ? errors::InvalidArgument(
2639 StrCat("Batch size doesn't match for tensor ",
2640 node_input[i].name,
2641 ": Provided batch size does not match "
2642 "converter batch size: 3 vs 2"))
2643 : Status::OK();
2644 std::vector<int> partial_input_shape;
2645 if (i == 0 && trt_mode_ == TrtTestMode::kDynamicShape &&
2646 !p.keep_channel_unknown) {
2647 // keep channel dim static (known)
2648 partial_input_shape.resize(4, -1);
2649 int channel_dim = (p.data_format == "NCHW" ? 1 : 3);
2650 partial_input_shape[channel_dim] = node_input[i].dims[channel_dim];
2651 }
2652 AddTestTensor(node_input[i].name, node_input[i].dims, tf_type_,
2653 node_input[i].val, partial_input_shape,
2654 expected_status);
2655
2656 } else {
2657 AddTestWeights(node_input[i].name, node_input[i].dims,
2658 node_input[i].val, tf_type_);
2659 }
2660 }
2661 TestOpConverter("my_batchnorm", node_def, node_input[0].dims,
2662 p.conversion_status, Status::OK(),
2663 ArrayFloatNear(expected_output));
2664 }
2665 }
2666 }
2667
TEST_P(OpConverter_FP32_Test,ConvertTranspose)2668 TEST_P(OpConverter_FP32_Test, ConvertTranspose) {
2669 // Get the NodeDef for Transpose.
2670 Scope s = Scope::NewRootScope();
2671 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2672 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2673 auto transpose = ops::Transpose(s.WithOpName("my_transpose"), input, weights);
2674 const NodeDef& node_def = transpose.operation.node()->def();
2675
2676 std::vector<TestParamBase> test_params = {
2677 // For the first test we leave param empty. This signals to use a
2678 // input as weight which will be invalid
2679 TestParamBase{{3, 1, 2, 1},
2680 {},
2681 {},
2682 {},
2683 Status(error::UNIMPLEMENTED,
2684 "The input \"perm\" for Transpose must be a "
2685 "constant")},
2686 TestParamBase{{1, 1, 2, 3},
2687 {},
2688 {},
2689 {0, 1, 2},
2690 Status(error::INVALID_ARGUMENT,
2691 "Rank of perm for transpose does not match with "
2692 "that of the input.")},
2693 // Transpose batch dim
2694 TestParamBase{
2695 {1, 1, 2, 3},
2696 {},
2697 {3, 2, 1, 1},
2698 {3, 2, 1, 0},
2699 (trt_mode_ == TrtTestMode::kImplicitBatch)
2700 ? Status(error::UNIMPLEMENTED,
2701 "Transpose at batch dimension is not supported")
2702 : Status::OK()},
2703 TestParamBase{{1, 1, 2, 3}, {}, {1, 3, 1, 2}, {0, 3, 1, 2}},
2704 };
2705 if (trt_mode_ == TrtTestMode::kDynamicShape) {
2706 // Dynamic shape tests where some shapes are known
2707 test_params.push_back(TestParamBase{
2708 {1, 1, 2, 3}, {-1, 1, 2, -1}, {1, 3, 1, 2}, {0, 3, 1, 2}});
2709 }
2710 std::vector<float> expected_values{1, 4, 2, 5, 3, 6};
2711 for (auto p : test_params) {
2712 SCOPED_TRACE(p);
2713 Reset();
2714 AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
2715 p.partial_input_dims);
2716 if (p.param.empty()) {
2717 AddTestTensor("weights", {3});
2718 } else {
2719 AddTestWeights<int32>("weights", {static_cast<int>(p.param.size())},
2720 p.param);
2721 }
2722 TestOpConverter("my_transpose", node_def, p.expected_output_dims, p.status,
2723 p.runtime_status, ElementsAreArray(expected_values));
2724 }
2725 }
2726
TEST_P(OpConverter_FP32_Test,ConvertTile)2727 TEST_P(OpConverter_FP32_Test, ConvertTile) {
2728 Scope s = Scope::NewRootScope();
2729 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2730 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2731 auto tile = ops::Tile(s.WithOpName("my_tile"), input, weights);
2732 const NodeDef& node_def = tile.operation.node()->def();
2733
2734 struct TileParam {
2735 std::vector<int> input_dims;
2736 std::vector<int> multiplier;
2737 std::vector<float> tensor;
2738 // Concrete (static) output dimensions, including batch size as first dim.
2739 std::vector<int> expected_output_dims;
2740 std::vector<int> expected_results;
2741 int test_ID;
2742 // Expected status of conversion (with concrete error message).
2743 Status status;
2744 };
2745
2746 std::vector<TileParam> test_params = {
2747 // Tests to be rejected by ConvertTile::Validate() for any trt_mode_.
2748 TileParam{{1, 2, 3}, // input_dims
2749 {1, -2, 1}, // multiplier
2750 {}, // tensor
2751 {}, // expected_output_dims
2752 {}, // expected_results
2753 1, // test_ID
2754 Status(error::INVALID_ARGUMENT,
2755 "All replications of the Tile operation in "
2756 "'my_tile' should be positive, got (1, -2, 1).")},
2757 TileParam{{1, 2, 3}, // input_dims
2758 {1, 2, 1, 3}, // multiplier
2759 {0, 1, 2, 3, 4, 5}, // tensor
2760 {}, // expected_output_dims
2761 {}, // expected_results
2762 2, // test_ID
2763 Status(error::INVALID_ARGUMENT,
2764 "The length of the replication vector (4) of the "
2765 "Tile operation in 'my_tile' is expected to be equal "
2766 "to the rank of the input vector (3).")},
2767 // Tests passed ConvertTile::Validate() for at least some trt_mode_.
2768 TileParam{{1, 2}, // input_dims
2769 {1, 3}, // multiplier
2770 {2, 3}, // tensor
2771 {1, 6}, // expected_output_dims
2772 {2, 3, 2, 3, 2, 3}}, // out values
2773 TileParam{{1, 2, 3}, // input_dims
2774 {1, 2, 1}, // multiplier
2775 {0, 1, 2, 3, 4, 5}, // tensor
2776 {1, 4, 3}, // output dims
2777 {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5}}, // expected_results
2778 TileParam{{1, 2, 3}, // input_dims
2779 {1, 1, 2}, // multiplier
2780 {0, 1, 2, 3, 4, 5}, // tensor
2781 {1, 2, 6}, // expected_output_dims
2782 {0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5}}, // expected_results
2783 TileParam{{1, 2, 3}, // input_dims
2784 {1, 2, 2}, // multiplier
2785 {0, 1, 2, 3, 4, 5}, // tensor
2786 {1, 4, 6}, // expected_output_dims
2787 {0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5,
2788 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5}}, // expected_results
2789 // Tests with non trivial batch size multiplier.
2790 TileParam{{1, 2}, // input_dims
2791 {2, 3}, // multiplier
2792 {2, 3}, // tensor
2793 {2, 6}, // expected_output_dims
2794 {2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3}}, // out values
2795 TileParam{{1, 2, 3}, // input_dims
2796 {2, 2, 1}, // multiplier
2797 {0, 1, 2, 3, 4, 5}, // tensor
2798 {2, 4, 3}, // output dims
2799 {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
2800 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5}}, // expected_results
2801 };
2802
2803 for (bool multiplier_is_tensor : {true, false}) {
2804 for (bool input_is_tensor : {true, false}) {
2805 for (auto p : test_params) {
2806 std::vector<int> num_mults = {static_cast<int>(p.multiplier.size())};
2807 std::vector<int> partial_input_dims = {};
2808 if (multiplier_is_tensor) {
2809 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2810 p.status =
2811 Status(error::INVALID_ARGUMENT,
2812 "Conversion for Tile is not implemented for multipliers "
2813 "passed as a tensor in implicit batch mode");
2814 num_mults = {1, static_cast<int>(p.multiplier.size())};
2815 } else {
2816 if (p.test_ID == 1) {
2817 // Skip this test because in that situation it is impossible
2818 // to do a valid check for negative multipliers.
2819 continue;
2820 }
2821
2822 if (trt_mode_ == TrtTestMode::kDynamicShape) {
2823 partial_input_dims = num_mults;
2824 p.status = Status::OK();
2825 }
2826
2827 if (p.test_ID == 2) {
2828 p.status = Status(error::INVALID_ARGUMENT,
2829 "When replications are defined as a tensor, "
2830 "the number of its elements (4) must be equal "
2831 "to the rank of the input tensor (3).");
2832 }
2833 }
2834 } else {
2835 if (trt_mode_ == TrtTestMode::kImplicitBatch && p.multiplier[0] > 1) {
2836 p.status =
2837 Status(error::UNIMPLEMENTED,
2838 "The Tile operation along "
2839 "the batch dimension in 'my_tile' is not implemented.");
2840 }
2841 }
2842
2843 Reset();
2844 if (input_is_tensor) {
2845 AddTestTensor("input", p.input_dims, p.tensor);
2846 } else {
2847 AddTestWeights("input", p.input_dims, p.tensor, tf_type_);
2848 }
2849
2850 if (multiplier_is_tensor) {
2851 AddTestTensor<int>("weights", num_mults, DT_INT32, p.multiplier,
2852 partial_input_dims);
2853 } else {
2854 AddTestWeights<int32>("weights", num_mults, p.multiplier);
2855 }
2856
2857 TestOpConverter("my_tile", node_def, p.expected_output_dims, p.status,
2858 Status::OK(), ElementsAreArray(p.expected_results));
2859 }
2860 }
2861 }
2862 }
2863
TEST_P(OpConverter_FP32_Test,ConvertReshape)2864 TEST_P(OpConverter_FP32_Test, ConvertReshape) {
2865 // Get the NodeDef for Reshape.
2866 Scope s = Scope::NewRootScope();
2867 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
2868 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
2869 auto reshape = ops::Reshape(s.WithOpName("my_reshape"), input, weights);
2870 const NodeDef& node_def = reshape.operation.node()->def();
2871
2872 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2873 // Shape is a tensor, should fail in implicit batch mode.
2874 Reset();
2875 AddTestTensor("input", {3, 2, 1});
2876 AddTestTensor("weights", {3});
2877 RunValidationAndConversion(
2878 node_def, error::INVALID_ARGUMENT,
2879 "The input \"shape\" for Reshape must be a constant in implicit batch "
2880 "mode");
2881 } else if (!IS_TRT_VERSION_GE(7, 1, 3, 0)) {
2882 // Shape is a tensor, should fail before TRT 7.1.3 even in explicit batch /
2883 // dynamic shape mode.
2884 Reset();
2885 AddTestTensor("input", {3, 2, 1});
2886 AddTestTensor("weights", {3});
2887 RunValidationAndConversion(
2888 node_def, error::INVALID_ARGUMENT,
2889 "Non constant shape input tensor for Reshape requires minimum TRT "
2890 "7.1.3");
2891 }
2892
2893 Status reshape_from_scalar_status =
2894 trt_mode_ == TrtTestMode::kImplicitBatch
2895 ? errors::Internal(
2896 "Failed to convert at least one input to a TRT_TensorOrWeights:"
2897 " Scalar input tensor is not supported since the first "
2898 "dimension is treated as batch dimension by TRT")
2899 : Status::OK();
2900 Status add_scalar_tensor_status =
2901 trt_mode_ == TrtTestMode::kImplicitBatch
2902 ? errors::InvalidArgument(
2903 "removing first dim requires explicit batch dimension")
2904 : Status::OK();
2905 Status reshape_to_scalar_status =
2906 trt_mode_ == TrtTestMode::kImplicitBatch
2907 ? errors::Unimplemented("Reshape to shape=[] is not supported")
2908 : Status::OK();
2909 Status reshape_batch_status =
2910 trt_mode_ == TrtTestMode::kImplicitBatch
2911 ? errors::Unimplemented("Reshape on batch dimension is not supported")
2912 : Status::OK();
2913
2914 struct TestParams {
2915 std::vector<int> tensor_dims;
2916 std::vector<int> shape;
2917 std::vector<int> expected_shape;
2918 Status conversion_status;
2919 Status runtime_status;
2920 std::vector<int> shape_prof; // needed concrete values if shape == -1.
2921 Status add_test_tensor_status;
2922 };
2923
2924 std::vector<TestParams> params = {
2925 // Reshape scalar to tensor, should fail in implicit batch mode.
2926 TestParams{{},
2927 {1, 1},
2928 {},
2929 reshape_from_scalar_status,
2930 {},
2931 {},
2932 add_scalar_tensor_status},
2933 // Reshape tensor to scalar, should fail in implicit batch mode.
2934 // - In explicit batch mode if shape is set as weight it works.
2935 // - In explicit batch mode && using shape as tensor input it should
2936 // fail. In that case we set the expected conversion status in the
2937 // test loop.
2938 TestParams{{1, 1}, {}, {}, reshape_to_scalar_status},
2939 // Reshape at batch dimension, should fail in implicit batch mode.
2940 TestParams{{1, 1, 2, 3}, {3, 1, 1, 2}, {}, reshape_batch_status},
2941 TestParams{{2, 1, 2, 3}, {-1, 1, 4}, {3, 1, 4}, reshape_batch_status},
2942 // Tests that should succeed in every trt_mode.
2943 TestParams{{1, 1, 2, 3}, {-1, 1, 3, 2}, {1, 1, 3, 2}},
2944 TestParams{{1, 1, 2, 3}, {1, 1, -1}, {1, 1, 6}},
2945 TestParams{{1, 1, 2, 3}, {1, 1, 3, 2}},
2946 TestParams{{2, 1, 2, 3}, {2, 1, 3, 2}},
2947 TestParams{{1, 1, 1}, {1}},
2948 TestParams{{1}, {1, 1}},
2949 TestParams{{2, 1, 1}, {2}},
2950 TestParams{{2}, {2, 1}},
2951 };
2952 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
2953 // Reshape tensor with zero rank using an empty shape tensor, should fail in
2954 // implicit batch mode. In explicit batch mode this is an identity operation
2955 // and does not add a reshape layer therefore we do not test it.
2956 params.push_back(TestParams{{},
2957 {},
2958 {},
2959 reshape_from_scalar_status,
2960 {},
2961 {},
2962 add_scalar_tensor_status});
2963 }
2964 // Testing the methods for representing the reshape shape for IShuffleLayer:
2965 // as a weight (true) or as a tensor (false).
2966 std::vector<bool> shape_input_options(1, true);
2967
2968 if (trt_mode_ != TrtTestMode::kImplicitBatch &&
2969 IS_TRT_VERSION_GE(7, 1, 3, 0)) {
2970 shape_input_options.push_back(false);
2971 }
2972
2973 for (auto p : params) {
2974 for (auto shape_as_weight : shape_input_options) {
2975 std::ostringstream oss;
2976 oss << "shape " << PrintToString(p.shape);
2977 SCOPED_TRACE(StrCat(oss.str(), shape_as_weight ? " weight" : " tensor"));
2978 if (!shape_as_weight && p.shape.empty()) {
2979 p.conversion_status = errors::Unimplemented(
2980 "Reshape with dynamic input requires 1D input tensor");
2981 }
2982 Reset();
2983 const int n_elements =
2984 std::accumulate(p.tensor_dims.begin(), p.tensor_dims.end(), 1,
2985 std::multiplies<int>());
2986 std::vector<float> input_vec(n_elements);
2987 std::iota(input_vec.begin(), input_vec.end(), 1);
2988 AddTestTensor("input", p.tensor_dims, tf_type_, input_vec, {},
2989 p.add_test_tensor_status);
2990 if (shape_as_weight) {
2991 AddTestWeights<int32>("weights", {static_cast<int>(p.shape.size())},
2992 p.shape);
2993 } else {
2994 std::vector<int32> dims;
2995 std::vector<int32> values{p.shape};
2996 if (!p.shape.empty()) {
2997 dims.push_back(p.shape.size());
2998 } else {
2999 // If the shape is empty we use a dummy value to ensure that
3000 // AddTestTensor creates the corresponding entry in InputOutputData.
3001 values.push_back(1);
3002 }
3003 AddTestTensor("weights", dims, DT_INT32, values, dims);
3004 }
3005 std::vector<int> expected_shape =
3006 p.expected_shape.empty() ? p.shape : p.expected_shape;
3007 VLOG(2) << "Calling TestOpConverter";
3008 TestOpConverter("my_reshape", node_def, expected_shape,
3009 p.conversion_status, p.runtime_status,
3010 ElementsAreArray(input_vec));
3011 }
3012 }
3013 }
3014
TEST_P(OpConverter_FP32_Test,ConvertShape)3015 TEST_P(OpConverter_FP32_Test, ConvertShape) {
3016 // Get the NodeDef for Shape op.
3017 Scope s = Scope::NewRootScope();
3018 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3019 auto shape = ops::Shape(s.WithOpName("my_shape"), input);
3020 const NodeDef& node_def = shape.operation.node()->def();
3021
3022 Status conversion_status =
3023 (trt_mode_ == TrtTestMode::kImplicitBatch)
3024 ? errors::Unimplemented(
3025 "Shape is only supported for explicit batch mode.")
3026 : Status::OK();
3027 std::vector<TestParamBase> test_params = {
3028 // TODO(b/166274212): Enable the test parameter for TensorRT 7.1.3.
3029 #if !IS_TRT_VERSION_GE(7, 1, 3, 0)
3030 TestParamBase{{1, 2, 3}, {}, {3}, {}, conversion_status},
3031 #endif
3032 // Add input as weight (we use non empty param ({1}) to trigger this).
3033 TestParamBase{{1, 2, 3}, {}, {3}, {1}, conversion_status},
3034 };
3035
3036 auto input_is_weight = [](const TestParamBase p) { return !p.param.empty(); };
3037 for (auto p : test_params) {
3038 SCOPED_TRACE(p);
3039 Reset();
3040 // The number of elements of the input tensor. We leave it 0 in case we do
3041 // not need to add an input tensor. This happens in explicit batch mode: the
3042 // shape is known at conversion time and therefore the shape is added to the
3043 // network as a constant layer. In this case the single node network that
3044 // we use for the unit test have no actual input tensor when it is converted
3045 // to a TensorRT network.
3046 int n_elements = 0;
3047 if (input_is_weight(p) || trt_mode_ != TrtTestMode::kExplicitBatch) {
3048 // Calculate the number of elements for adding input data.
3049 n_elements = std::accumulate(p.input_dims.begin(), p.input_dims.end(), 1,
3050 std::multiplies<int>());
3051 }
3052 std::vector<float> input_val(n_elements, 1);
3053 if (!input_is_weight(p)) {
3054 AddTestTensor("input", p.input_dims, input_val);
3055 } else {
3056 AddTestWeights("input", p.input_dims, input_val, tf_type_);
3057 }
3058 TestOpConverter("my_shape", node_def, p.expected_output_dims, p.status,
3059 p.runtime_status, ElementsAreArray(p.input_dims),
3060 {DT_INT32});
3061 }
3062 }
3063
3064 struct MatMulTestParams {
3065 std::vector<int> shape_a;
3066 std::vector<int> values_a;
3067 bool transpose_a;
3068 std::vector<int> shape_b;
3069 std::vector<int> values_b;
3070 bool transpose_b;
3071 std::vector<int> expected_shape;
3072 std::vector<int> expected_output;
3073 };
3074
3075 // Helper function for testing MatMul and BatchMatMul. get_matmul is a function
3076 // used to generate the node. It accepts (DataType, transpose_a, transpose_b) as
3077 // parameters.
TestMatMulHelper(ParameterizedOpConverterTestBase * test,const std::function<NodeDef (DataType,bool,bool)> & get_matmul,const std::vector<MatMulTestParams> & params)3078 void TestMatMulHelper(
3079 ParameterizedOpConverterTestBase* test,
3080 const std::function<NodeDef(DataType, bool, bool)>& get_matmul,
3081 const std::vector<MatMulTestParams>& params) {
3082 {
3083 // Unsupported data type.
3084 test->Reset();
3085 NodeDef node_def = get_matmul(DT_INT32, false, false);
3086 test->AddTestTensor("input", {1, 2}, DT_INT32, {});
3087 test->AddTestWeights<int32>("weights", {2, 1}, {3, 5});
3088 test->RunValidationAndConversion(
3089 node_def, error::UNIMPLEMENTED,
3090 StrCat("Data type int32 is not supported for ", node_def.op(),
3091 ", must be one of [float, half]"));
3092 }
3093
3094 // FC conversion depends on whether the last dim of A is known or not. In
3095 // Dynamic shape mode, we will check whether A is handled correctly if it has
3096 // a partially known input shape (last dim known).
3097 std::vector<bool> a_test_partial_shape_values{false};
3098 if (test->get_trt_mode() == TrtTestMode::kDynamicShape) {
3099 a_test_partial_shape_values.push_back(true);
3100 }
3101
3102 for (auto p : params) {
3103 for (bool a_is_tensor : {true, false}) {
3104 for (bool b_is_tensor : {true, false}) {
3105 for (bool a_partial_shape : a_test_partial_shape_values) {
3106 if (a_partial_shape && !a_is_tensor) {
3107 // Only tensors can have partial shape.
3108 continue;
3109 }
3110 if (!a_is_tensor && !b_is_tensor) {
3111 // Skip test when both args are weights. We do not convert this
3112 // since const folding eliminates this case.
3113 continue;
3114 }
3115 SCOPED_TRACE(StrCat("A", p.transpose_a ? ".T" : "", " is ",
3116 a_is_tensor ? "tensor" : "weight", ", B",
3117 p.transpose_b ? ".T" : "", " is ",
3118 b_is_tensor ? "tensor " : "weight, rank A ",
3119 p.shape_a.size(), ", rank B ", p.shape_b.size()));
3120 test->Reset();
3121
3122 NodeDef node_def =
3123 get_matmul(test->get_tf_type(), p.transpose_a, p.transpose_b);
3124 const bool is_batch_matmul = node_def.op() == "BatchMatMul";
3125
3126 if (a_is_tensor) {
3127 if (a_partial_shape) {
3128 // Prepare a partial shape for A where only the last dim is known.
3129 std::vector<int> partial_shape(p.shape_a.size(), -1);
3130 int k = p.shape_a.size() - 1;
3131 partial_shape.at(k) = p.shape_a.at(k);
3132 test->AddTestTensor("input", p.shape_a, test->get_tf_type(),
3133 p.values_a, partial_shape);
3134 } else {
3135 test->AddTestTensor("input", p.shape_a, p.values_a);
3136 }
3137 } else {
3138 test->AddTestWeights("input", p.shape_a, p.values_a,
3139 test->get_tf_type());
3140 }
3141 if (b_is_tensor) {
3142 if (a_is_tensor && p.shape_a[0] != p.shape_b[0] &&
3143 test->get_trt_mode() == TrtTestMode::kImplicitBatch) {
3144 VLOG(2) << "Skipping test with inpcompatible batch dimensions";
3145 continue;
3146 }
3147 test->AddTestTensor("weights", p.shape_b, p.values_b);
3148 } else {
3149 test->AddTestWeights("weights", p.shape_b, p.values_b,
3150 test->get_tf_type());
3151 }
3152
3153 Status conversion_status = Status::OK();
3154 if (test->get_trt_mode() == TrtTestMode::kImplicitBatch) {
3155 // Implicit batch mode has several restriction. We change conversion
3156 // status accordingly.
3157 if (is_batch_matmul) {
3158 if (a_is_tensor && p.shape_a.size() < p.shape_b.size()) {
3159 conversion_status = errors::InvalidArgument(
3160 "Broadcasting beyond batch dimension is not supported "
3161 "(tensor #dims ",
3162 p.shape_a.size(), " vs broadcast #dims ", p.shape_b.size(),
3163 ")");
3164 }
3165 if (b_is_tensor && p.shape_b.size() < p.shape_a.size()) {
3166 conversion_status = errors::InvalidArgument(
3167 "Broadcasting beyond batch dimension is not supported "
3168 "(tensor #dims ",
3169 p.shape_b.size(), " vs broadcast #dims ", p.shape_a.size(),
3170 ")");
3171 }
3172 if ((!a_is_tensor || !b_is_tensor) && p.shape_a[0] != 1) {
3173 conversion_status = errors::Unimplemented(
3174 "TensorRT does not support batched constants in implicit "
3175 "batch mode.");
3176 }
3177 } else if ((a_is_tensor && p.shape_a.size() <= 2 &&
3178 (p.transpose_a || b_is_tensor)) ||
3179 (b_is_tensor && p.shape_b.size() <= 2)) {
3180 conversion_status = errors::InvalidArgument(
3181 "MatMul with 2D tensors requires explicit batch mode, or that"
3182 " tensor A is not transposed and B is a constant tensor.");
3183 }
3184 }
3185
3186 test->TestOpConverter("my_matmul", node_def, p.expected_shape,
3187 conversion_status, Status::OK(),
3188 ElementsAreArray(p.expected_output));
3189 if (!conversion_status.ok()) {
3190 VLOG(2) << "Converted with status " << conversion_status;
3191 }
3192 VLOG(2) << "== Finished test iteration ==";
3193 }
3194 }
3195 }
3196 }
3197 }
3198
3199 template <typename LayerType>
CheckAddedLayers(OpConverterTest * test,bool expect_found)3200 void CheckAddedLayers(OpConverterTest* test, bool expect_found) {
3201 bool layer_found = false;
3202 for (int i = 0; i < test->converter_->network()->getNbLayers(); i++) {
3203 nvinfer1::ILayer* layer = test->converter_->network()->getLayer(i);
3204 if (dynamic_cast<LayerType*>(layer)) {
3205 layer_found = true;
3206 }
3207 }
3208 EXPECT_EQ(expect_found, layer_found);
3209 }
3210
GetMatMulTestParams()3211 std::vector<MatMulTestParams> GetMatMulTestParams() {
3212 std::vector<MatMulTestParams> params{
3213 // clang-format off
3214 MatMulTestParams{{2, 2}, {0, 1, 2, 3}, false, // A (shape, val, T?)
3215 {2, 2}, {0, 1, 2, 3}, false, // B (shape, val, T?)
3216 {2, 2}, {2, 3, 6, 11}}, // result (shape, val)
3217 MatMulTestParams{{2, 2}, {0, 1, 2, 3}, false,
3218 {2, 2}, {0, 1, 2, 3}, true,
3219 {2, 2}, {1, 3, 3, 13}},
3220 MatMulTestParams{{2, 2}, {0, 1, 2, 3}, true,
3221 {2, 2}, {0, 1, 2, 3}, false,
3222 {2, 2}, {4, 6, 6, 10}},
3223 MatMulTestParams{{2, 2}, {0, 1, 2, 3}, true,
3224 {2, 2}, {0, 1, 2, 3}, true,
3225 {2, 2}, {2, 6, 3, 11}},
3226 MatMulTestParams{{2, 3}, {0, 1, 2, 3, 4, 5}, false,
3227 {2, 3}, {1, 2, 3, 4, 5, 6}, true,
3228 {2, 2}, {8, 17, 26, 62}},
3229 MatMulTestParams{{2, 3}, {0, 1, 2, 3, 4, 5}, true,
3230 {2, 3}, {1, 2, 3, 4, 5, 6}, false,
3231 {3, 3}, {12, 15, 18, 17, 22, 27, 22, 29, 36}},
3232 MatMulTestParams{{3, 2}, {0, 1, 2, 3, 4, 5}, false,
3233 {2, 3}, {1, 2, 3, 4, 5, 6}, false,
3234 {3, 3}, {4, 5, 6, 14, 19, 24, 24, 33, 42}},
3235 MatMulTestParams{{3, 2}, {0, 1, 2, 3, 4, 5}, true,
3236 {2, 3}, {1, 2, 3, 4, 5, 6}, true,
3237 {2, 2}, {16, 34, 22, 49}},
3238 // clang-format on
3239 };
3240 return params;
3241 }
3242
TEST_P(OpConverter_FP32_Test,ConvertMatMul)3243 TEST_P(OpConverter_FP32_Test, ConvertMatMul) {
3244 // Get the NodeDef for MatMul.
3245 auto get_matmul_nodedef = [](DataType dtype, bool transpose_a,
3246 bool transpose_b) -> NodeDef {
3247 Scope s = Scope::NewRootScope();
3248 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
3249 auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
3250 const auto matmul_attrs =
3251 ops::MatMul::TransposeA(transpose_a).TransposeB(transpose_b);
3252 auto matmul =
3253 ops::MatMul(s.WithOpName("my_matmul"), input, weights, matmul_attrs);
3254 return matmul.operation.node()->def();
3255 };
3256
3257 TestMatMulHelper(this, get_matmul_nodedef, GetMatMulTestParams());
3258 }
3259
TEST_P(OpConverter_FP32_Test,ConvertBatchMatMul)3260 TEST_P(OpConverter_FP32_Test, ConvertBatchMatMul) {
3261 // Get the NodeDef for BatchMatMul.
3262 auto get_batch_matmul_nodedef = [](DataType dtype, bool transpose_a,
3263 bool transpose_b) -> NodeDef {
3264 Scope s = Scope::NewRootScope();
3265 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
3266 auto weights = ops::Placeholder(s.WithOpName("weights"), dtype);
3267 const auto matmul_attrs =
3268 ops::BatchMatMul::AdjX(transpose_a).AdjY(transpose_b);
3269 auto matmul = ops::BatchMatMul(s.WithOpName("my_matmul"), input, weights,
3270 matmul_attrs);
3271 return matmul.operation.node()->def();
3272 };
3273
3274 // We derive test data from the MatMul test params by adding extra leading
3275 // dimensions.
3276 std::vector<MatMulTestParams> params_2d = GetMatMulTestParams();
3277 std::vector<MatMulTestParams> params;
3278 params.reserve(params_2d.size() * 3 + 1);
3279
3280 auto insert_ones = [](std::vector<int> v, int n) {
3281 std::vector<int> ones(n, 1);
3282 ones.insert(ones.end(), v.begin(), v.end());
3283 return ones;
3284 };
3285
3286 // Add a leading 1 dimension to A, B and result.
3287 std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
3288 [](MatMulTestParams p) {
3289 p.shape_a.insert(p.shape_a.begin(), 1);
3290 p.shape_b.insert(p.shape_b.begin(), 1);
3291 p.expected_shape.insert(p.expected_shape.begin(), 1);
3292 return p;
3293 });
3294
3295 // Test with N > 1: weights cannot be batched in implicit batch mode.
3296 // clang-format off
3297 params.push_back(
3298 MatMulTestParams{{2, 2, 2}, {0, 1, 2, 3, 0, 1, 2, 3}, false, // A
3299 {2, 2, 2}, {0, 1, 2, 3, 0, 1, 2, 3}, false, // B
3300 {2, 2, 2}, {2, 3, 6, 11, 2, 3, 6, 11}} // result
3301 );
3302
3303 params.push_back(
3304 MatMulTestParams{{2, 2, 3}, {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5},
3305 false,
3306 {2, 2, 3}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}, true,
3307 {2, 2, 2}, {8, 17, 26, 62, 8, 17, 26, 62}});
3308 // clang-format on
3309
3310 // Add two leading 1 dimensions to A, B and result.
3311 std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
3312 [insert_ones](MatMulTestParams p) {
3313 p.shape_a = insert_ones(p.shape_a, 2);
3314 p.shape_b = insert_ones(p.shape_b, 2);
3315 p.expected_shape = insert_ones(p.expected_shape, 2);
3316 return p;
3317 });
3318
3319 // Test broadcast: add two leading 1 dimensions to A, but not to B.
3320 std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
3321 [insert_ones](MatMulTestParams p) {
3322 p.shape_a = insert_ones(p.shape_a, 2);
3323 p.expected_shape = insert_ones(p.expected_shape, 2);
3324 return p;
3325 });
3326
3327 // Test broadcast: add a leading 1 dimension to A and two leading 1s to B.
3328 // Broadcasting A need a dynamic brodacast which will be incompatible with
3329 // FC layer.
3330 std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
3331 [insert_ones](MatMulTestParams p) {
3332 p.shape_a = insert_ones(p.shape_a, 1);
3333 p.shape_b = insert_ones(p.shape_b, 2);
3334 p.expected_shape = insert_ones(p.expected_shape, 2);
3335 return p;
3336 });
3337
3338 // Test with N > 1: since weights cannot be batched in implicit batch mode.
3339 // We tests with batch size 2.
3340 std::transform(params_2d.begin(), params_2d.end(), std::back_inserter(params),
3341 [insert_ones](MatMulTestParams p) {
3342 p.shape_a.insert(p.shape_a.begin(), 2);
3343 p.values_a.reserve(p.values_a.size() * 2);
3344 p.values_a.insert(p.values_a.end(), p.values_a.begin(),
3345 p.values_a.end());
3346
3347 p.shape_b.insert(p.shape_b.begin(), 2);
3348 p.values_b.reserve(p.values_b.size() * 2);
3349 p.values_b.insert(p.values_b.end(), p.values_b.begin(),
3350 p.values_b.end());
3351
3352 p.expected_shape.insert(p.expected_shape.begin(), 2);
3353 p.expected_output.reserve(p.expected_output.size() * 2);
3354 p.expected_output.insert(p.expected_output.end(),
3355 p.expected_output.begin(),
3356 p.expected_output.end());
3357 return p;
3358 });
3359
3360 // 4D tensor where the second "batch dim" is not 1
3361 params.push_back(MatMulTestParams{
3362 {1, 2, 4, 5},
3363 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
3364 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
3365 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39},
3366 false, // A
3367 {1, 2, 3, 5},
3368 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3369 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30},
3370 true, // B
3371 {1, 2, 4, 3},
3372 {40, 90, 140, 115, 290, 465, 190, 490,
3373 790, 265, 690, 1115, 1990, 2540, 3090, 2440,
3374 3115, 3790, 2890, 3690, 4490, 3340, 4265, 5190}}); // result
3375
3376 TestMatMulHelper(this, get_batch_matmul_nodedef, params);
3377 }
3378
3379 #if IS_TRT_VERSION_GE(7, 1, 3, 0)
TEST_P(OpConverter_FP32_Test,ConvertEinsum)3380 TEST_P(OpConverter_FP32_Test, ConvertEinsum) {
3381 // Get the NodeDef for Einsum.
3382 auto get_einsum_nodedef = [](DataType dtype, std::string eq,
3383 int n_inputs = 2) -> NodeDef {
3384 Scope s = Scope::NewRootScope();
3385 auto a = ops::Placeholder(s.WithOpName("input_a"), dtype);
3386 std::vector<Input> input_vec{a};
3387 if (n_inputs > 1) {
3388 auto b = ops::Placeholder(s.WithOpName("input_b"), dtype);
3389 input_vec.push_back(b);
3390 }
3391 InputList inputs(input_vec);
3392 auto einsum = ops::Einsum(s.WithOpName("my_einsum"), inputs, eq);
3393 return einsum.operation.node()->def();
3394 };
3395
3396 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
3397 Reset();
3398 NodeDef node_def = get_einsum_nodedef(tf_type_, "ab,cb->ac");
3399 AddTestTensor("input_a", {2, 3});
3400 AddTestTensor("input_b", {2, 3});
3401 TestOpConverter(
3402 "my_einsum", node_def, {2, 2},
3403 errors::Unimplemented("Einsum converter requires dynamic shape mode"),
3404 Status::OK(), ElementsAreArray({13, 16, 40, 52}));
3405 // No further tests.
3406 return;
3407 }
3408
3409 struct TestParams {
3410 std::string equation;
3411 std::vector<int> shape_a;
3412 std::vector<int> values_a;
3413 std::vector<int> shape_b;
3414 std::vector<int> values_b;
3415 std::vector<int> expected_shape;
3416 std::vector<int> expected_output;
3417 Status conv_status;
3418 };
3419
3420 Status unimplemented_eq = errors::Unimplemented("");
3421 Status internal_err = errors::Internal("");
3422 Status internal_err_before_TRT82 =
3423 IS_TRT_VERSION_GE(8, 2, 0, 0) ? Status::OK() : internal_err;
3424 Status unimplemented_before_TRT82 =
3425 IS_TRT_VERSION_GE(8, 2, 0, 0) ? Status::OK() : unimplemented_eq;
3426
3427 Status diagonal_error = unimplemented_eq;
3428 // The old converter only accepts 2 inputs, and the validator returns
3429 // internal_err if only 1 input is used.
3430 Status diagonal_error_1_input =
3431 IS_TRT_VERSION_GE(8, 2, 0, 0) ? unimplemented_eq : internal_err;
3432
3433 std::vector<TestParams> params{
3434 // Dot product.
3435 TestParams{"i,i->", {2}, {2, 3}, {2}, {1, 2}, {}, {8}, unimplemented_eq},
3436 TestParams{"ik,ik->",
3437 {2, 2},
3438 {2, 3, 4, 1},
3439 {2, 2},
3440 {1, 2, 1, 3},
3441 {},
3442 {15},
3443 unimplemented_eq},
3444 // Outer product.
3445 TestParams{"i,k->ik",
3446 {2},
3447 {1, 2},
3448 {3},
3449 {1, 2, 3},
3450 {2, 3},
3451 {1, 2, 3, 2, 4, 6},
3452 unimplemented_eq},
3453 TestParams{"ij,kl->ijkl",
3454 {2, 1},
3455 {1, 2},
3456 {3, 1},
3457 {1, 2, 3},
3458 {2, 1, 3, 1},
3459 {1, 2, 3, 2, 4, 6},
3460 unimplemented_before_TRT82},
3461 // Transpose.
3462 TestParams{"ik->ki",
3463 {2, 3},
3464 {0, 1, 2, 3, 4, 5},
3465 {},
3466 {},
3467 {3, 2},
3468 {0, 3, 1, 4, 2, 5},
3469 internal_err_before_TRT82},
3470 // Diag.
3471 TestParams{"ii->i",
3472 {3, 3},
3473 {0, 1, 2, 3, 4, 5, 6, 7, 8},
3474 {},
3475 {},
3476 {3},
3477 {0, 4, 8},
3478 diagonal_error_1_input},
3479 // Trace.
3480 TestParams{"ii->", // Note TF einsum op always has '->'.
3481 {3, 3},
3482 {0, 1, 2, 3, 4, 5, 6, 7, 8},
3483 {},
3484 {},
3485 {},
3486 {12},
3487 diagonal_error_1_input},
3488 // MatMul with reduction.
3489 TestParams{"abbc,dc->ad",
3490 {1, 2, 2, 3},
3491 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3492 {2, 3},
3493 {1, 2, 3, 4, 5, 6},
3494 {2, 3},
3495 {1, 2, 3, 2, 4, 6},
3496 diagonal_error},
3497 // Ellipsis with broadcast.
3498 TestParams{"...ik,...jk->...ij",
3499 {1, 3, 1, 4},
3500 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
3501 {2, 1, 1, 4},
3502 {1, 2, 3, 4, 5, 6, 7, 8},
3503 {2, 3, 1, 1},
3504 {20, 60, 100, 44, 148, 252},
3505 unimplemented_eq},
3506 // MatMul.
3507 TestParams{"ab,bc->ac",
3508 {2, 3},
3509 {0, 1, 2, 3, 4, 5},
3510 {3, 2},
3511 {1, 2, 3, 4, 5, 6},
3512 {2, 2},
3513 {13, 16, 40, 52}},
3514 // Batched MatMul.
3515 TestParams{"abc,cde->abde",
3516 /*shape_a=*/{1, 2, 3},
3517 /*values_a=*/{0, 1, 2, 3, 4, 5},
3518 /*shape_b=*/{3, 2, 2},
3519 /*values_v=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3520 /*expected_shape=*/{1, 2, 2, 2},
3521 /*expected_output=*/{23, 26, 29, 32, 68, 80, 92, 104}},
3522 TestParams{"abcd,cde->abe",
3523 {1, 2, 2, 3},
3524 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
3525 {2, 3, 2},
3526 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3527 {1, 2, 2},
3528 {125, 140, 341, 392}},
3529 // TF assumes case sensitive labels.
3530 TestParams{"aBAE,AEe->aBe",
3531 {1, 2, 2, 3},
3532 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
3533 {2, 3, 2},
3534 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3535 {1, 2, 2},
3536 {125, 140, 341, 392}},
3537 TestParams{"abc,cd->abd",
3538 {1, 2, 3},
3539 {0, 1, 2, 3, 4, 5},
3540 {3, 2},
3541 {1, 2, 3, 4, 5, 6},
3542 {1, 2, 2},
3543 {13, 16, 40, 52}},
3544 TestParams{"acbe,aecd->abcd",
3545 {1, 2, 3, 4},
3546 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
3547 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
3548 {1, 4, 2, 3},
3549 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
3550 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
3551 {1, 3, 2, 3},
3552 {90, 96, 102, 732, 786, 840, 250, 272, 294, 940, 1010, 1080,
3553 410, 448, 486, 1148, 1234, 1320}},
3554 TestParams{"aecd,abcd->acbe",
3555 {1, 2, 3, 4},
3556 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
3557 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
3558 {1, 2, 3, 4},
3559 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
3560 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
3561 {1, 3, 2, 2},
3562 {20, 140, 92, 788, 148, 460, 412, 1300, 404, 908, 860, 1940}},
3563 TestParams{"acd,dce->ae",
3564 {1, 2, 3},
3565 {0, 1, 2, 3, 4, 5},
3566 {3, 2, 2},
3567 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3568 {1, 2},
3569 {115, 130}},
3570 TestParams{"abcd,bace->bade",
3571 {2, 3, 2, 1},
3572 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
3573 {3, 2, 2, 1},
3574 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
3575 {3, 2, 1, 1},
3576 {2, 46, 28, 128, 86, 242}},
3577 TestParams{
3578 "cebfad,fageb->abcdg",
3579 {1, 1, 3, 3, 2, 2},
3580 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
3581 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
3582 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35},
3583 {3, 2, 2, 1, 3},
3584 {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
3585 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
3586 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36},
3587 {2, 3, 1, 2, 2},
3588 {252, 288, 291, 336, 768, 912, 810, 963, 1356, 1608, 1401, 1662,
3589 438, 492, 495, 558, 1176, 1338, 1236, 1407, 1986, 2256, 2049, 2328}},
3590 };
3591
3592 for (auto p : params) {
3593 for (bool a_is_tensor : {true, false}) {
3594 for (bool b_is_tensor : {true, false}) {
3595 if (!a_is_tensor && !b_is_tensor) {
3596 // Skip test when both args are weights. We do not convert this
3597 // since const folding eliminates this case.
3598 continue;
3599 }
3600 Reset();
3601 int n_inputs = p.shape_b.empty() ? 1 : 2;
3602 NodeDef node_def = get_einsum_nodedef(tf_type_, p.equation, n_inputs);
3603 if (a_is_tensor) {
3604 AddTestTensor("input_a", p.shape_a, p.values_a);
3605 } else {
3606 AddTestWeights("input_a", p.shape_a, p.values_a, tf_type_);
3607 }
3608 if (!p.shape_b.empty()) {
3609 if (b_is_tensor) {
3610 AddTestTensor("input_b", p.shape_b, p.values_b);
3611 } else {
3612 AddTestWeights("input_b", p.shape_b, p.values_b, tf_type_);
3613 }
3614 }
3615 TestOpConverter("my_einsum", node_def, p.expected_shape, p.conv_status,
3616 Status::OK(), ElementsAreArray(p.expected_output));
3617 }
3618 }
3619 }
3620 }
3621 #endif // IS_TRT_VERSION_GE(7, 1, 3, 0)
3622
TEST_P(OpConverter_FP32_FP16_Test,ConvertBiasAdd)3623 TEST_P(OpConverter_FP32_FP16_Test, ConvertBiasAdd) {
3624 // Note that kINT32 is not supported by IScaleLayer, so we don't test
3625 // DT_INT32 type here. DT_FLOAT and DT_HALF are tested.
3626 // Get the NodeDef for BiasAdd.
3627 auto get_biasadd_nodedef = [](const string& data_format,
3628 DataType tf_type) -> NodeDef {
3629 Scope s = Scope::NewRootScope();
3630 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
3631 auto weights = ops::Placeholder(s.WithOpName("weights"), tf_type);
3632 const auto biasadd_attrs = ops::BiasAdd::DataFormat(data_format);
3633 auto biasadd =
3634 ops::BiasAdd(s.WithOpName("my_biasadd"), input, weights, biasadd_attrs);
3635 return biasadd.operation.node()->def();
3636 };
3637
3638 for (const string& data_format : {"NHWC", "NCHW"}) {
3639 for (const int trt_input_rank : {1, 2, 3, 4}) {
3640 Reset();
3641 NodeDef node_def = get_biasadd_nodedef(data_format, tf_type_);
3642
3643 // Add input, dims_array will be like {2, 1, ..., 1, 3}
3644 std::vector<int32> dims_array(trt_input_rank + 1, 1);
3645 if (trt_input_rank == 1) {
3646 dims_array[1] = (data_format == "NHWC" ? 3 : 2);
3647 } else {
3648 dims_array[1] = 2;
3649 dims_array[trt_input_rank] = 3;
3650 }
3651 const int64_t num_input = DimsAdapter(dims_array).Volume();
3652 ASSERT_EQ(trt_input_rank > 1 ? 6 : (data_format == "NHWC" ? 3 : 2),
3653 num_input);
3654 std::vector<float> input_data(num_input, 0);
3655
3656 AddTestTensor("input", dims_array, input_data);
3657
3658 const int channel_size = (data_format == "NHWC" ? 3 : 2);
3659 std::vector<float> bias(channel_size);
3660 for (int i = 0; i < channel_size; ++i) {
3661 bias[i] = i + 1; // bias will be {1, 2, 3, ...}
3662 }
3663 AddTestWeights("weights", {channel_size}, bias, tf_type_);
3664
3665 // Build and run the engine.
3666 std::vector<float> output_data;
3667
3668 if (trt_input_rank == 1) {
3669 if (data_format == "NHWC") {
3670 output_data = {1, 2, 3};
3671 } else {
3672 output_data = {1, 2};
3673 }
3674 } else {
3675 if (data_format == "NHWC") {
3676 output_data = {1, 2, 3, 1, 2, 3};
3677 } else {
3678 output_data = {1, 1, 1, 2, 2, 2};
3679 }
3680 }
3681 TestOpConverter("my_biasadd", node_def, dims_array, Status::OK(),
3682 Status::OK(), ElementsAreArray(output_data));
3683 }
3684 }
3685 }
3686
3687 template <typename OpType>
GetBinaryOpNodeDef(DataType dtype)3688 NodeDef GetBinaryOpNodeDef(DataType dtype) {
3689 Scope s = Scope::NewRootScope();
3690 auto input_l = ops::Placeholder(s.WithOpName("input1"), dtype);
3691 auto input_r = ops::Placeholder(s.WithOpName("input2"), dtype);
3692 auto op = OpType(s.WithOpName("my_binary"), input_l, input_r);
3693 return op.operation.node()->def();
3694 }
3695
TEST_P(OpConverter_FP32_FP16_BinaryTest,ConvertBinary)3696 TEST_P(OpConverter_FP32_FP16_BinaryTest, ConvertBinary) {
3697 using OpFunc = std::function<NodeDef(DataType)>;
3698 std::map<std::string, std::pair<OpFunc, std::vector<float>>> op_test_info;
3699 #define ADD_OP(name, op, v1, v2, v3, v4, v5, v6, v7, v8) \
3700 op_test_info[name] = \
3701 std::make_pair(GetBinaryOpNodeDef<op>, \
3702 std::vector<float>(v1, v2, v3, v4, v5, v6, v7, v8))
3703 ADD_OP("Add", ops::Add, {5, 8, 6, 9, 5, 8, 6, 9});
3704 ADD_OP("AddV2", ops::AddV2, {5, 8, 6, 9, 5, 8, 6, 9});
3705 ADD_OP("Sub", ops::Sub, {1, 4, 0, 3, 1, 4, 0, 3});
3706 ADD_OP("Mul", ops::Mul, {6, 12, 9, 18, 6, 12, 9, 18});
3707 ADD_OP("Div", ops::Div, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
3708 ADD_OP("RealDiv", ops::RealDiv, {1.5, 3, 1, 2, 1.5, 3, 1, 2});
3709 ADD_OP("FloorDiv", ops::FloorDiv, {1, 3, 1, 2, 1, 3, 1, 2});
3710 ADD_OP("Minimum", ops::Minimum, {2, 2, 3, 3, 2, 2, 3, 3});
3711 ADD_OP("Maximum", ops::Maximum, {3, 6, 3, 6, 3, 6, 3, 6});
3712 ADD_OP("Pow", ops::Pow, {9, 36, 27, 216, 9, 36, 27, 216});
3713 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
3714 ADD_OP("Greater", ops::Greater, {1, 1, 0, 1, 1, 1, 0, 1});
3715 ADD_OP("Less", ops::Less, {0, 0, 0, 0, 0, 0, 0, 0});
3716 ADD_OP("Equal", ops::Equal, {0, 0, 1, 0, 0, 0, 1, 0});
3717 ADD_OP("GreaterEqual", ops::Less, {1, 1, 1, 1, 1, 1, 1, 1});
3718 ADD_OP("LessEqual", ops::Greater, {0, 0, 1, 0, 0, 0, 1, 0});
3719 #endif
3720 #undef ADD_OP
3721 std::vector<std::vector<float>> data = {
3722 {3, 6, 3, 6}, {3, 6}, {2, 3, 2, 3}, {2, 3}};
3723 RunTests(*BinaryOperationMap(), op_test_info, data);
3724 }
3725
TEST_P(OpConverter_BOOL_BinaryTest,ConvertBooleanBinary)3726 TEST_P(OpConverter_BOOL_BinaryTest, ConvertBooleanBinary) {
3727 using OpFunc = std::function<NodeDef(DataType)>;
3728 std::map<std::string, std::pair<OpFunc, std::vector<int>>> op_test_info;
3729 #define ADD_OP(name, op, v1, v2, v3, v4, v5, v6, v7, v8) \
3730 op_test_info[name] = \
3731 std::make_pair(GetBinaryOpNodeDef<op>, \
3732 std::vector<int>(v1, v2, v3, v4, v5, v6, v7, v8))
3733 ADD_OP("LogicalOr", ops::LogicalOr, {1, 1, 0, 1, 1, 1, 0, 1});
3734 ADD_OP("LogicalAnd", ops::LogicalAnd, {0, 1, 0, 0, 0, 1, 0, 0});
3735 #undef ADD_OP
3736 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
3737 std::vector<std::vector<int>> data = {
3738 {0, 1, 0, 1}, {0, 1}, {1, 0, 1, 0}, {1, 0}};
3739 RunTests(*BinaryBooleanOperationMap(), op_test_info, data);
3740 #endif
3741 }
3742
GetAddNNodeDef(const std::vector<string> & input_names,DataType dtype)3743 NodeDef GetAddNNodeDef(const std::vector<string>& input_names, DataType dtype) {
3744 Scope s = Scope::NewRootScope();
3745 OutputList inputs;
3746 for (const string& name : input_names) {
3747 inputs.push_back(ops::Placeholder(s.WithOpName(name), dtype));
3748 }
3749 auto op = ops::AddN(s.WithOpName("my_addn"), inputs);
3750 return op.operation.node()->def();
3751 }
3752
3753 struct AddNTestParams {
3754 std::vector<float> input_values;
3755 std::vector<string> input_names;
3756 std::vector<int> dimensions;
3757 std::vector<float> expected_output;
3758 Status status;
3759 };
3760
TestAddN(ParameterizedOpConverterTestBase * test,AddNTestParams & p)3761 void TestAddN(ParameterizedOpConverterTestBase* test, AddNTestParams& p) {
3762 // All inputs are tensors.
3763 test->Reset();
3764 const NodeDef node_def = GetAddNNodeDef(p.input_names, test->get_tf_type());
3765
3766 if (p.input_values.size() % p.input_names.size() != 0) {
3767 LOG(ERROR) << "The number of input values: `" << p.input_values.size()
3768 << "` is not a multiple of the number of inputs: `"
3769 << p.input_names.size() << "`";
3770 ASSERT_TRUE(false);
3771 }
3772
3773 DataVec input_data;
3774 int input_offset = 0;
3775 const int window_size = p.input_values.size() / p.input_names.size();
3776 for (const string& name : p.input_names) {
3777 std::vector<float>::const_iterator start_pos =
3778 p.input_values.begin() + input_offset;
3779 std::vector<float>::const_iterator end_pos = start_pos + window_size;
3780 std::vector<float> sub_input_val(start_pos, end_pos);
3781 input_offset += window_size;
3782
3783 test->AddTestTensor(name, p.dimensions, test->get_tf_type(), sub_input_val);
3784 }
3785
3786 test->TestOpConverter("my_addn", node_def, p.dimensions,
3787 /*expected_conversion_status=*/p.status,
3788 /*expected_runtime_status=*/p.status,
3789 /*matcher=*/ElementsAreArray(p.expected_output),
3790 /*out_tf_types=*/{test->get_tf_type()});
3791 }
3792
TEST_P(OpConverter_FP32_FP16_Test,ConvertAddN)3793 TEST_P(OpConverter_FP32_FP16_Test, ConvertAddN) {
3794 {
3795 // Weights with batch dim that is not 1.
3796 Reset();
3797 const NodeDef node_def = GetAddNNodeDef({"tensor", "weights"}, tf_type_);
3798 AddTestTensor("tensor", /*dims=*/{1, 2});
3799 AddTestWeights<float>("weights", {2, 1, 2}, {0, 1, 2, 3});
3800 RunValidationAndConversion(
3801 node_def, error::INVALID_ARGUMENT,
3802 "Weights input to AddN is required to have batch dimension 1.");
3803 }
3804
3805 const std::vector<float> common_input = CreateVectorIota<float>(6);
3806
3807 std::vector<AddNTestParams> params = {
3808 {/*input_values=*/common_input,
3809 /*input_names=*/{"inp1", "inp2", "inp3"},
3810 /*dimensions=*/{1, 1, 2, 1, 1},
3811 /*expected_output=*/{6, 9},
3812 /*status=*/Status::OK()},
3813 {/*input_values=*/common_input,
3814 /*input_names=*/{"inp1", "inp2"},
3815 /*dimensions=*/{1, 1, 3, 1, 1},
3816 /*expected_output=*/{3, 5, 7},
3817 /*status=*/Status::OK()},
3818 {/*input_values=*/common_input,
3819 /*input_names=*/{"inp1", "inp2", "inp3"},
3820 /*dimensions=*/{1, 2, 1, 1},
3821 /*expected_output=*/{6, 9},
3822 /*status=*/Status::OK()},
3823 {/*input_values=*/common_input,
3824 /*input_names=*/{"inp1", "inp2"},
3825 /*dimensions=*/{1, 1, 3, 1},
3826 /*expected_output=*/{3, 5, 7},
3827 /*status=*/Status::OK()},
3828 {/*input_values=*/common_input,
3829 /*input_names=*/{"inp1", "inp2", "inp3"},
3830 /*dimensions=*/{1, 2, 1},
3831 /*expected_output=*/{6, 9},
3832 /*status=*/Status::OK()},
3833 {/*input_values=*/common_input,
3834 /*input_names=*/{"inp1", "inp2"},
3835 /*dimensions=*/{1, 1, 3},
3836 /*expected_output=*/{3, 5, 7},
3837 /*status=*/Status::OK()},
3838 {/*input_value=*/common_input,
3839 /*input_names=*/{"inp1", "inp2", "inp3"},
3840 /*dimensions=*/{2, 1},
3841 /*expected_output=*/{6, 9},
3842 /*status=*/Status::OK()},
3843 {/*input_values=*/common_input,
3844 /*input_names=*/{"inp1", "inp2"},
3845 /*dimensions=*/{1, 3},
3846 /*expected_output=*/{3, 5, 7},
3847 /*status=*/Status::OK()},
3848 {/*input_values=*/common_input,
3849 /*input_names=*/{"inp1", "inp2", "inp3"},
3850 /*dimensions=*/{2},
3851 /*expected_output=*/{6, 9},
3852 /*status=*/Status::OK()},
3853 {/*input_values=*/common_input,
3854 /*input_names=*/{"inp1", "inp2"},
3855 /*dimensions=*/{3},
3856 /*expected_output=*/{3, 5, 7},
3857 /*status=*/Status::OK()},
3858 {/*input_values=*/common_input,
3859 /*input_names=*/{"inp1", "inp2", "inp3", "inp4", "inp5", "inp6"},
3860 /*dimensions=*/{1},
3861 /*expected_output=*/{15},
3862 /*status=*/Status::OK()},
3863 };
3864
3865 for (auto p : params) {
3866 TestAddN(this, p);
3867 }
3868 }
3869
TEST_P(OpConverter_FP32_Test,ConvertQDQDynamicRangeMode)3870 TEST_P(OpConverter_FP32_Test, ConvertQDQDynamicRangeMode) {
3871 {
3872 // FakeQuantWithMinMaxArgs attributes are empty, should fail.
3873 Reset(TrtPrecisionMode::INT8);
3874 NodeDef node_def =
3875 MakeNodeDef("my_quantize", "FakeQuantWithMinMaxArgs", {"input"});
3876 AddTestTensor("input", {1, 2, 3});
3877 RunValidationAndConversion(node_def, error::NOT_FOUND,
3878 "No attr named 'min'");
3879 }
3880 {
3881 // FakeQuantWithMinMaxArgs ranges set via attributes, ok.
3882 Reset(TrtPrecisionMode::INT8);
3883 Scope s = Scope::NewRootScope();
3884 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3885 auto quantize_attrs = ops::FakeQuantWithMinMaxArgs::Min(-6.0f).Max(6.0f);
3886 auto quantize = ops::FakeQuantWithMinMaxArgs(s.WithOpName("my_quantize"),
3887 input, quantize_attrs);
3888 const NodeDef& node_def = quantize.operation.node()->def();
3889 AddTestTensor("input", {1, 2, 3});
3890 RunValidationAndConversion(node_def);
3891 TRT_TensorOrWeights output;
3892 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3893 ASSERT_TRUE(output.is_tensor());
3894 auto ranges = quantization_ranges();
3895 EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3896 EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3897 }
3898 {
3899 // FakeQuantWithMinMaxVars ranges set via inputs, ok.
3900 Reset(TrtPrecisionMode::INT8);
3901 Scope s = Scope::NewRootScope();
3902 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3903 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3904 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3905 auto quantize = ops::FakeQuantWithMinMaxVars(
3906 s.WithOpName("my_quantize"), input, weights_min, weights_max);
3907 const NodeDef& node_def = quantize.operation.node()->def();
3908 AddTestTensor("input", {1, 2, 3});
3909 AddTestWeights<float>("weights_min", {1}, {-6.0f});
3910 AddTestWeights<float>("weights_max", {1}, {6.0f});
3911 RunValidationAndConversion(node_def);
3912 TRT_TensorOrWeights output;
3913 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3914 ASSERT_TRUE(output.is_tensor());
3915 auto ranges = quantization_ranges();
3916 EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3917 EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3918 }
3919 {
3920 // QuantizeAndDequantizeV2 ranges set via inputs, ok.
3921 Reset(TrtPrecisionMode::INT8);
3922 Scope s = Scope::NewRootScope();
3923 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3924 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3925 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3926 auto quantize = ops::QuantizeAndDequantizeV2(
3927 s.WithOpName("my_quantize"), input, weights_min, weights_max);
3928 const NodeDef& node_def = quantize.operation.node()->def();
3929 AddTestTensor("input", {1, 2, 3});
3930 AddTestWeights<float>("weights_min", {1}, {-6.0f});
3931 AddTestWeights<float>("weights_max", {1}, {6.0f});
3932 RunValidationAndConversion(node_def);
3933 TRT_TensorOrWeights output;
3934 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3935 ASSERT_TRUE(output.is_tensor());
3936 auto ranges = quantization_ranges();
3937 EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3938 EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3939 }
3940 {
3941 // QuantizeAndDequantizeV2 Range inputs are tensors, should fail.
3942 Reset(TrtPrecisionMode::INT8);
3943 Scope s = Scope::NewRootScope();
3944 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3945 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3946 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3947 auto quantize = ops::QuantizeAndDequantizeV2(
3948 s.WithOpName("my_quantize"), input, weights_min, weights_max);
3949 const NodeDef& node_def = quantize.operation.node()->def();
3950 AddTestTensor("input", {1, 2, 3});
3951 AddTestTensor("weights_min", {1});
3952 AddTestTensor("weights_max", {1});
3953 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3954 "The input \"input_min\" for "
3955 "QuantizeAndDequantizeV2 must be a constant");
3956 }
3957 {
3958 // QuantizeAndDequantizeV3 ranges set via inputs, ok.
3959 Reset(TrtPrecisionMode::INT8);
3960 Scope s = Scope::NewRootScope();
3961 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
3962 auto weights_min = ops::Placeholder(s.WithOpName("weights_min"), DT_FLOAT);
3963 auto weights_max = ops::Placeholder(s.WithOpName("weights_max"), DT_FLOAT);
3964 auto num_bits = ops::Placeholder(s.WithOpName("num_bits"), DT_INT32);
3965 auto quantize = ops::QuantizeAndDequantizeV3(
3966 s.WithOpName("my_quantize"), input, weights_min, weights_max, num_bits);
3967 const NodeDef& node_def = quantize.operation.node()->def();
3968 AddTestTensor("input", {1, 2, 3});
3969 AddTestWeights<float>("weights_min", {1}, {-6.0f});
3970 AddTestWeights<float>("weights_max", {1}, {6.0f});
3971 AddTestWeights<int>("num_bits", {1}, {8});
3972 RunValidationAndConversion(node_def);
3973 TRT_TensorOrWeights output;
3974 TF_EXPECT_OK(GetTensorOrWeights("my_quantize", &output));
3975 ASSERT_TRUE(output.is_tensor());
3976 auto ranges = quantization_ranges();
3977 EXPECT_EQ(1, ranges.count(output.tensor()->trt_tensor()));
3978 EXPECT_EQ(6.0f, ranges[output.tensor()->trt_tensor()]);
3979 }
3980 }
3981
TEST_P(OpConverter_FP32_FP16_Test,ConvertSquare)3982 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquare) {
3983 {
3984 // Input is weights, should fail.
3985 Reset();
3986 Scope s = Scope::NewRootScope();
3987 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3988 auto square = ops::Square(s.WithOpName("my_square"), input);
3989 NodeDef node_def = square.operation.node()->def();
3990 AddTestWeights("input", {1, 2, 3}, {1, 2, 3, 4, -5, 6}, tf_type_);
3991 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
3992 "The input \"x\" for Square must be a tensor");
3993 }
3994
3995 Reset();
3996
3997 Scope s = Scope::NewRootScope();
3998 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
3999 auto square = ops::Square(s.WithOpName("my_square"), input);
4000 NodeDef node_def = square.operation.node()->def();
4001
4002 const int num_inputs = 20;
4003 std::vector<float> inputs(num_inputs);
4004 std::vector<float> expected_outputs(num_inputs);
4005
4006 for (int i = 0; i < num_inputs; ++i) {
4007 const float value = (i - 9);
4008 inputs[i] = value;
4009 expected_outputs[i] = value * value;
4010 }
4011 AddTestTensor("input", {1, 1, 20}, tf_type_, inputs);
4012
4013 TestOpConverter("my_square", node_def, {1, 1, 20}, Status::OK(), Status::OK(),
4014 ArrayFloatNear(expected_outputs, 0));
4015 }
4016
4017 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
4018
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertFill)4019 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertFill) {
4020 Scope s = Scope::NewRootScope();
4021 auto dims = ops::Placeholder(s.WithOpName("dims"), DT_INT32);
4022 auto value = ops::Placeholder(s.WithOpName("value"), tf_type_);
4023 auto fill = ops::Fill(s.WithOpName("my_fill"), dims, value);
4024 const NodeDef& node_def = fill.operation.node()->def();
4025
4026 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
4027 Reset();
4028 // random data
4029 AddTestWeights("dims", {2}, {2, 2}, DT_INT32);
4030 AddTestWeights("value", {1}, {42.0}, tf_type_);
4031 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4032 "Conversion for Fill is not implemented in "
4033 "implicit batch mode");
4034 return;
4035 }
4036
4037 std::vector<std::vector<int>> output_dims_params = {
4038 {8}, {8, 2, 4}, {32, 32, 3200}};
4039 std::vector<std::vector<int>> value_dims_params = {{}, {1}};
4040
4041 float val = 42.0;
4042 Status status = Status::OK();
4043 for (bool dims_is_tensor : {true, false}) {
4044 for (bool value_is_tensor : {true, false}) {
4045 for (auto output_dims : output_dims_params) {
4046 for (auto value_dims : value_dims_params) {
4047 Reset();
4048 std::vector<int32> dims_dims = {output_dims.size()};
4049 if (dims_is_tensor) {
4050 AddTestTensor("dims", dims_dims, DT_INT32, output_dims, dims_dims);
4051 } else {
4052 AddTestWeights("dims", dims_dims, output_dims, DT_INT32);
4053 }
4054 if (value_is_tensor) {
4055 AddTestTensor("value", value_dims, tf_type_, {val});
4056 } else {
4057 AddTestWeights("value", value_dims, {val}, tf_type_);
4058 }
4059 size_t nb_el = 1;
4060 for (auto d : output_dims) {
4061 nb_el *= d;
4062 }
4063 std::vector<float> expected_output(nb_el, val);
4064 TestOpConverter("my_fill", node_def, output_dims, status, status,
4065 ElementsAreArray(expected_output));
4066 }
4067 }
4068 }
4069 }
4070 }
4071
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertRange)4072 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertRange) {
4073 auto get_casted_value = [this](const float value, const DataType dtype) {
4074 return dtype == DT_INT32 ? static_cast<int32>(value) : value;
4075 };
4076
4077 // A function that builds the next lexicographically greater configuration
4078 // for the current one. The configuration is described as a (0,1)-vector
4079 // config, where config[i] is 0 or 1 when the i-th parameter is passed as
4080 // a weight or tensor, respectively. The function returns TRUE if such
4081 // a configuration is built, or FALSE otherwise.
4082 auto nextTensorWeigtConfiguration = [this](std::vector<int>& config) {
4083 for (int i = config.size(); i-- > 0;) {
4084 if (config[i] = 1 - config[i]) return true;
4085 }
4086 return false;
4087 };
4088
4089 auto set_parameters = [this](const std::array<const char*, 3>& name,
4090 const std::array<std::vector<float>, 3>& value,
4091 const std::array<DataType, 3>& type,
4092 const std::vector<int>& config,
4093 int shape_idx = -1) {
4094 Reset();
4095 for (int i = 0; i < 3; i++) {
4096 if (config[i]) {
4097 std::vector<int32> partial_shape_dims = {};
4098 // The correct partial shape will be provided
4099 // (a) for all parameters, when shape_idx > 3
4100 // (b) for all parameters, except shape_idx, when shape_idx >= 0
4101 // (c) for none of the shape_idx < 0
4102 if (shape_idx > 3 || shape_idx >= 0 && shape_idx != i) {
4103 partial_shape_dims = {1};
4104 }
4105 AddTestTensor(name[i], {1}, type[i], value[i], partial_shape_dims);
4106 } else {
4107 AddTestWeights(name[i], {1}, value[i], type[i]);
4108 }
4109 }
4110 };
4111
4112 const float start = 1.0;
4113 const float limit = 43.0;
4114 const float delta = 2.0;
4115
4116 const std::array<const char*, 3> param_name = {"start", "limit", "delta"};
4117 std::array<std::vector<float>, 3> param_value;
4118 param_value[0] = {start};
4119 param_value[1] = {limit};
4120 param_value[2] = {delta};
4121 const auto start_type = tf_type_;
4122 std::array<DataType, 3> param_type = {tf_type_, tf_type_, tf_type_};
4123
4124 Scope s = Scope::NewRootScope();
4125 const auto range =
4126 ops::Range(s.WithOpName("my_range"),
4127 ops::Placeholder(s.WithOpName(param_name[0]), param_type[0]),
4128 ops::Placeholder(s.WithOpName(param_name[1]), param_type[1]),
4129 ops::Placeholder(s.WithOpName(param_name[2]), param_type[2]));
4130
4131 const NodeDef& ndef = range.operation.node()->def();
4132 const std::vector<DataType> param_types{DT_FLOAT, DT_HALF, DT_INT32};
4133
4134 // ConverterRange is not implemented for Implicite batch mode.
4135 std::vector<int> config(3, 0);
4136 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
4137 do {
4138 set_parameters(param_name, param_value, param_type, config);
4139 RunValidationAndConversion(ndef, error::UNIMPLEMENTED,
4140 "Conversion for Range is not implemented in "
4141 "implicit batch mode");
4142 } while (nextTensorWeigtConfiguration(config));
4143
4144 return;
4145 }
4146
4147 const std::string expect_msg = convert_range_expected_msg(ndef);
4148 bool all_weights = true;
4149 do {
4150 for (auto limit_type : param_types) {
4151 param_type[1] = limit_type;
4152 for (auto delta_type : param_types) {
4153 param_type[2] = delta_type;
4154
4155 const auto all_integers = start_type == DT_INT32 &&
4156 limit_type == DT_INT32 &&
4157 delta_type == DT_INT32;
4158
4159 if (all_weights || all_integers && !config[2]) {
4160 // Reject invalid parameters if delta = 0 and it's passed as a weight.
4161 param_value[2] = {0};
4162 set_parameters(param_name, param_value, param_type, config);
4163 RunValidationAndConversion(
4164 ndef, error::INVALID_ARGUMENT,
4165 "The delta parameter of Range operation cannot be equal to 0");
4166
4167 if (!all_weights && !config[2]) {
4168 param_value[2] = {-1};
4169 set_parameters(param_name, param_value, param_type, config);
4170 const string err = StrCat(
4171 "The delta parameter of Range operation "
4172 "cannot be negative, when one of (start, limit) is passed as "
4173 "a tensor, but got ",
4174 param_value[2][0]);
4175 RunValidationAndConversion(ndef, error::INVALID_ARGUMENT, err);
4176 }
4177 }
4178
4179 if (all_weights) {
4180 // Reject invalid parameters preventing the limit from
4181 // being reached for fixed values of start and delta.
4182 for (int j = 0; j <= 1; j++) {
4183 param_value[j] = {get_casted_value(start, tf_type_)};
4184 param_value[1 - j] = {get_casted_value(limit, limit_type)};
4185 param_value[2] = {(2 * j - 1) *
4186 get_casted_value(delta, delta_type)};
4187 set_parameters(param_name, param_value, param_type, config);
4188 const auto error = convert_range_error_msg(
4189 param_value[0][0], param_value[1][0], param_value[2][0]);
4190 RunValidationAndConversion(ndef, error::INVALID_ARGUMENT, error);
4191 }
4192 }
4193
4194 param_value[0] = {start};
4195 param_value[2] = {delta};
4196 if (all_integers) {
4197 if (trt_mode_ == TrtTestMode::kDynamicShape) {
4198 // Wrong dimension for the parameter passed as a tensor.
4199 for (int j = 0; j < 3; j++) {
4200 if (!config[j]) continue;
4201
4202 const string err =
4203 StrCat("Dimension for '", param_name[j],
4204 "' of Range operator should be equal to 1");
4205 set_parameters(param_name, param_value, param_type, config, j);
4206 RunValidationAndConversion(ndef, error::INVALID_ARGUMENT, err);
4207 }
4208 }
4209 } else {
4210 if (!all_weights) {
4211 // The following test should fail, when
4212 // (a) at least one parameter is passed as a tensor;
4213 // (b) at least one parameter is not of type DT_INT32.
4214 set_parameters(param_name, param_value, param_type, config);
4215 RunValidationAndConversion(ndef, error::UNIMPLEMENTED, expect_msg);
4216 }
4217 }
4218 }
4219 }
4220 // All other configs will be set so that at least one parameter
4221 // will be passed as a tensor
4222 all_weights = false;
4223 } while (nextTensorWeigtConfiguration(config));
4224
4225 nvinfer1::DataType trt_type;
4226 TF_ASSERT_OK(TfTypeToTrtType(DT_BOOL, &trt_type));
4227 const std::string error_msg =
4228 "Unsupported data type " + DebugString(trt_type) + " used for '";
4229 do {
4230 for (auto limit_type : param_types) {
4231 param_type[1] = limit_type;
4232 for (auto delta_type : param_types) {
4233 param_type[2] = delta_type;
4234
4235 for (int i = 0; i < 3; i++) {
4236 if (!config[i]) {
4237 const auto saved_type = param_type[i];
4238 param_type[i] = DT_BOOL;
4239 set_parameters(param_name, param_value, param_type, config);
4240 param_type[i] = saved_type;
4241 RunValidationAndConversion(ndef, error::INVALID_ARGUMENT,
4242 error_msg + param_name[i] + "'");
4243 }
4244 }
4245 }
4246 }
4247 } while (nextTensorWeigtConfiguration(config));
4248
4249 // The tests that pass all checks in ConvertRange::Validate().
4250 const Status status = Status::OK();
4251 const std::vector<DataType> int_type{DT_INT32};
4252 int partial_shape_idx = -1;
4253 all_weights = true;
4254 do {
4255 // For now when at least one of (start, limit, delta) is passed as a tensor
4256 // (a) all these parameters should be of DT_INT32 type;
4257 // (b) only positive delta could be used.
4258 const auto& types = all_weights ? param_types : int_type;
4259 const auto jEnd = all_weights ? 1 : 0;
4260 for (auto limit_type : types) {
4261 param_type[1] = limit_type;
4262 for (auto delta_type : types) {
4263 param_type[2] = delta_type;
4264 // Loop for positive and negative deltas.
4265 for (int j = 0; j <= jEnd; j++) {
4266 // Define the expected result which should match the usage
4267 // of DT_INT32 for one of (start, limit, delta).
4268 const int mult = (1 - 2 * j);
4269 param_value[j] = {get_casted_value(start, tf_type_)};
4270 param_value[1 - j] = {get_casted_value(limit, limit_type)};
4271 param_value[2] = {mult * get_casted_value(delta, delta_type)};
4272
4273 // Create expected output.
4274 std::vector<float> expected_output;
4275 const float limit_curr = param_value[1][0];
4276 const float delta_curr = param_value[2][0];
4277 float value = param_value[0][0];
4278 int num_values = 0;
4279 while (mult * (limit_curr - value) > 0) {
4280 num_values++;
4281 expected_output.push_back(value);
4282 value += delta_curr;
4283 }
4284
4285 set_parameters(param_name, param_value, param_type, config,
4286 partial_shape_idx);
4287 const std::vector<int> output_dims = {num_values};
4288 TestOpConverter("my_range", ndef, output_dims, status, status,
4289 ElementsAreArray(expected_output));
4290 }
4291 }
4292 }
4293
4294 if (all_weights) {
4295 if (start_type != DT_INT32) break;
4296 if (trt_mode_ == TrtTestMode::kDynamicShape) partial_shape_idx = 3;
4297
4298 // All other configs will be set so that at least one parameter
4299 // will be passed as a tensor
4300 all_weights = false;
4301 }
4302 } while (nextTensorWeigtConfiguration(config));
4303 }
4304
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertLikeOps)4305 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertLikeOps) {
4306 auto get_node = [&](int value) -> NodeDef {
4307 Scope s = Scope::NewRootScope();
4308 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
4309 if (value == 0) {
4310 auto zeros_like = ops::ZerosLike(s.WithOpName("Zeros"), input);
4311 return zeros_like.operation.node()->def();
4312 }
4313 auto ones_like = ops::OnesLike(s.WithOpName("Ones"), input);
4314 return ones_like.operation.node()->def();
4315 };
4316
4317 for (int value : {0, 1}) {
4318 Reset();
4319 const NodeDef& node_def = get_node(value);
4320 const std::string name = value ? "Ones" : "Zeros";
4321
4322 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
4323 std::vector<float> input_data(8, 42.0f);
4324 AddTestTensor("input", {8}, tf_type_, input_data);
4325 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4326 "Conversion for " + name + "Like is not " +
4327 "implemented in implicit batch mode");
4328 continue;
4329 }
4330
4331 std::vector<std::vector<int>> output_dims_params = {
4332 {8}, {8, 2, 4}, {32, 32, 3200}};
4333
4334 float val = 42.0;
4335 Status status = Status::OK();
4336 for (bool input_is_tensor : {true, false}) {
4337 for (auto output_dims : output_dims_params) {
4338 Reset();
4339 size_t nb_el = 1;
4340 for (auto d : output_dims) {
4341 nb_el *= d;
4342 }
4343 std::vector<float> input_data(nb_el, val);
4344 if (input_is_tensor) {
4345 AddTestTensor("input", output_dims, tf_type_, input_data);
4346 } else {
4347 AddTestWeights("input", output_dims, input_data, tf_type_);
4348 }
4349 std::vector<float> expected_output(nb_el, value);
4350 TestOpConverter(name, node_def, output_dims, status, status,
4351 ElementsAreArray(expected_output));
4352 }
4353 }
4354 }
4355 }
4356
4357 #endif // IS_TRT_VERSION_GE(8, 2, 0, 0)
4358
4359 #if IS_TRT_VERSION_GE(8, 2, 1, 6) || defined(TF_TRT_USE_EFFICIENT_NMS_PLUGIN)
4360
TEST_P(OpConverter_FP32_Test,ConvertCombinedNMS)4361 TEST_P(OpConverter_FP32_Test, ConvertCombinedNMS) {
4362 // Get the NodeDef for CombinedNMS.
4363 auto get_nms_nodedef = [](DataType tf_type, bool clip_boxes = true,
4364 bool pad_per_class = false) -> NodeDef {
4365 Scope s = Scope::NewRootScope();
4366 auto boxes_tensor = ops::Placeholder(s.WithOpName("boxes"), tf_type);
4367 auto scores_tensor = ops::Placeholder(s.WithOpName("scores"), tf_type);
4368 auto max_output_size_per_class =
4369 ops::Placeholder(s.WithOpName("max_output_size_per_class"), DT_INT32);
4370 auto max_total_size =
4371 ops::Placeholder(s.WithOpName("max_total_size"), DT_INT32);
4372 auto iou_threshold =
4373 ops::Placeholder(s.WithOpName("iou_threshold"), tf_type);
4374 auto score_threshold =
4375 ops::Placeholder(s.WithOpName("score_threshold"), tf_type);
4376 auto nms_attrs = ops::CombinedNonMaxSuppression::Attrs()
4377 .PadPerClass(pad_per_class)
4378 .ClipBoxes(clip_boxes);
4379
4380 auto nms_op = ops::CombinedNonMaxSuppression(
4381 s.WithOpName("my_nms"), boxes_tensor, scores_tensor,
4382 max_output_size_per_class, max_total_size, iou_threshold,
4383 score_threshold, nms_attrs);
4384 return nms_op.operation.node()->def();
4385 };
4386
4387 struct TestParams {
4388 const std::string description;
4389 const std::vector<int32> boxes_tensor_dims;
4390 const std::vector<int32> scores_tensor_dims;
4391 const std::vector<float> boxes_values;
4392 const std::vector<float> scores_values;
4393 const int32 max_output_size_per_class;
4394 const int32 max_total_size;
4395 const float iou_threshold;
4396 const float score_threshold;
4397 bool pad_per_class;
4398 bool clip_boxes;
4399 const std::vector<std::vector<int32>> expected_output_dims;
4400 const std::vector<float> exp_boxes;
4401 const std::vector<float> exp_scores;
4402 const std::vector<float> exp_classes;
4403 const std::vector<float> exp_num_detections;
4404 Status conversion_status;
4405 Status runtime_status;
4406 };
4407
4408 Status conv_status =
4409 trt_mode_ == TrtTestMode::kImplicitBatch
4410 ? errors::Unimplemented(
4411 "Implict batch mode not supported with CombinedNMS")
4412 : Status::OK();
4413
4414 std::vector<TestParams> params = {
4415 TestParams{"Test 1: clip boxes",
4416 {1, 1, 3, 4}, // boxes dims
4417 {1, 1, 3}, // scores dims
4418 // boxes values:
4419 {0, 0, 0.3, 1.4, 0, 0, 0.3, 1.4, 0, 0, 0.3, 1.4},
4420 {0.4, 0.7, 0.3}, // scores values
4421 3, // max_output_size_per_class
4422 2, // max_total_size
4423 0.1, // IOU threshold
4424 0, // score_threshold
4425 false, // pad_per_class
4426 true, // clip_boxes
4427 {{1, 2, 4}, // expected_nmsed_boxes_dims
4428 {1, 2}, // expected_nmsed_scores_dims
4429 {1, 2}, // expected_nmsed_classes_dims
4430 {1}}, // expected_valid_detections_dims
4431 // exp_boxes_values:
4432 {0, 0, 0.3, 1.0, 0, 0, 0.3, 1.0},
4433 {0.7, 0.4}, // exp_scores
4434 {1, 0}, // exp_classes
4435 {2}, // exp_num_detections
4436 conv_status},
4437 TestParams{
4438 "Test 2: iou threshold",
4439 {1, 5, 1, 4}, // boxes dims
4440 {1, 5, 1}, // scores dims
4441 // boxes values:
4442 {0, 0, 5, 10, 0, 1, 5, 11, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
4443 {5, 4, 3, 2, 1}, // scores values
4444 4, // max_output_size_per_class
4445 4, // max_total_size
4446 0.7, // IOU threshold
4447 0, // score threshold
4448 false, // pad_per_class
4449 false, // clip_boxes
4450 {{1, 4, 4}, // expected nmsed_boxes_dims
4451 {1, 4}, // expected nmsed_scores_dims
4452 {1, 4}, // expected_nmsed_classes_dims
4453 {1}}, // expected_valid_detections_dims
4454 // exp_boxes_values:
4455 {0, 0, 5, 10, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
4456 {5, 3, 2, 1}, // exp_scores
4457 {0, 0, 0, 0}, // exp_classes
4458 {4}, // exp_num_detections
4459 conv_status},
4460 TestParams{
4461 "Test 3: score threshold",
4462 {1, 5, 1, 4}, // boxes dims
4463 {1, 5, 1}, // scores dims
4464 // boxes values:
4465 {0, 0, 5, 10, 0, 1, 5, 11, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
4466 {5, 4, 3, 2, 1}, // scores values
4467 4, // max_output_size_per_class
4468 4, // max_total_size
4469 0.1, // IOU threshold
4470 2, // score threshold
4471 false, // pad_per_class
4472 false, // clip_boxes
4473 {{1, 4, 4}, // expected nmsed_boxes_dims
4474 {1, 4}, // expected nmsed_scores_dims
4475 {1, 4}, // expected_nmsed_classes_dims
4476 {1}}, // expected_valid_detections_dims
4477 // exp_boxes_values:
4478 {0, 0, 5, 10, 8, 0, 12, 4, 0, 0, 0, 0, 0, 0, 0, 0},
4479 {5, 3, 0, 0}, // exp_scores
4480 {0, 0, 0, 0}, // exp_classes
4481 {2}, // exp_num_detections
4482 conv_status},
4483 TestParams{
4484 "Test 4: per class size and pad",
4485 {1, 5, 1, 4}, // boxes dims
4486 {1, 5, 2}, // scores dims
4487 // boxes values:
4488 {0, 0, 5, 10, 0, 1, 5, 11, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
4489 // scores values:
4490 {5, 0, 0, 4, 3, 0, 2, 0, 1, 0},
4491 1, // max_output_size_per_class
4492 4, // max_total_size
4493 0.1, // IOU threshold
4494 0, // score threshold
4495 true, // pad_per_class
4496 false, // clip_boxes
4497 {{1, 2, 4}, // expected nmsed_boxes_dims
4498 {1, 2}, // expected nmsed_scores_dims
4499 {1, 2}, // expected_nmsed_classes_dims
4500 {1}}, // expected_valid_detections_dims
4501 // exp_boxes_values:
4502 {0, 0, 5, 10, 0, 1, 5, 11},
4503 {5, 4}, // exp_scores
4504 {0, 1}, // exp_classes
4505 {2}, // exp_num_detections
4506 conv_status},
4507 TestParams{
4508 "Test 5: different box coordinate order",
4509 {1, 5, 1, 4}, // boxes dims
4510 {1, 5, 2}, // scores dims
4511 // boxes values:
4512 {5, 10, 0, 0, 5, 11, 0, 1, 12, 4, 8, 0, 10, 6, 6, 2, 11, 12, 8, 9},
4513 // scores values:
4514 {5, 0, 0, 4, 3, 0, 2, 0, 1, 0},
4515 1, // max_output_size_per_class
4516 4, // max_total_size
4517 0.1, // IOU threshold
4518 0, // score threshold
4519 true, // pad_per_class
4520 false, // clip_boxes
4521 {{1, 2, 4}, // expected nmsed_boxes_dims
4522 {1, 2}, // expected nmsed_scores_dims
4523 {1, 2}, // expected_nmsed_classes_dims
4524 {1}}, // expected_valid_detections_dims
4525 // exp_boxes_values:
4526 {5, 10, 0, 0, 5, 11, 0, 1},
4527 {5, 4}, // exp_scores
4528 {0, 1}, // exp_classes
4529 {2}, // exp_num_detections
4530 conv_status},
4531 };
4532
4533 for (auto p : params) {
4534 Reset();
4535 SCOPED_TRACE(p.description);
4536 AddTestTensor("boxes", p.boxes_tensor_dims, p.boxes_values);
4537 AddTestTensor("scores", p.scores_tensor_dims, p.scores_values);
4538 AddTestWeights<int32>("max_output_size_per_class", {1},
4539 {p.max_output_size_per_class});
4540 AddTestWeights<int32>("max_total_size", {1}, {p.max_total_size});
4541 AddTestWeights<float>("iou_threshold", {1}, {p.iou_threshold}, tf_type_);
4542 AddTestWeights<float>("score_threshold", {1}, {p.score_threshold},
4543 tf_type_);
4544
4545 auto node_def = get_nms_nodedef(tf_type_, p.clip_boxes, p.pad_per_class);
4546
4547 TestOpConverterMultiOut("my_nms", node_def, p.expected_output_dims,
4548 p.conversion_status, p.runtime_status,
4549 {
4550 ElementsAreArray(p.exp_boxes),
4551 ElementsAreArray(p.exp_scores),
4552 ElementsAreArray(p.exp_classes),
4553 ElementsAreArray(p.exp_num_detections),
4554 },
4555 {tf_type_, tf_type_, tf_type_, DT_INT32});
4556 }
4557 }
4558
4559 #elif IS_TRT_VERSION_GE(7, 1, 3, 0)
4560
TEST_P(OpConverter_FP32_Test,ConvertCombinedNMS)4561 TEST_P(OpConverter_FP32_Test, ConvertCombinedNMS) {
4562 // Get the NodeDef for CombinedNMS.
4563 auto get_nms_nodedef = [](DataType tf_type, bool clip_boxes = true,
4564 bool pad_per_class = false) -> NodeDef {
4565 Scope s = Scope::NewRootScope();
4566 auto boxes_tensor = ops::Placeholder(s.WithOpName("boxes"), tf_type);
4567 auto scores_tensor = ops::Placeholder(s.WithOpName("scores"), tf_type);
4568 auto max_output_size_per_class =
4569 ops::Placeholder(s.WithOpName("max_output_size_per_class"), DT_INT32);
4570 auto max_total_size =
4571 ops::Placeholder(s.WithOpName("max_total_size"), DT_INT32);
4572 auto iou_threshold =
4573 ops::Placeholder(s.WithOpName("iou_threshold"), tf_type);
4574 auto score_threshold =
4575 ops::Placeholder(s.WithOpName("score_threshold"), tf_type);
4576 auto nms_attrs = ops::CombinedNonMaxSuppression::Attrs()
4577 .PadPerClass(pad_per_class)
4578 .ClipBoxes(clip_boxes);
4579
4580 auto nms_op = ops::CombinedNonMaxSuppression(
4581 s.WithOpName("my_nms"), boxes_tensor, scores_tensor,
4582 max_output_size_per_class, max_total_size, iou_threshold,
4583 score_threshold, nms_attrs);
4584 return nms_op.operation.node()->def();
4585 };
4586
4587 struct TestParams {
4588 const std::string description;
4589 const std::vector<int32> boxes_tensor_dims;
4590 const std::vector<int32> scores_tensor_dims;
4591 const std::vector<float> boxes_values;
4592 const std::vector<float> scores_values;
4593 const int32 max_output_size_per_class;
4594 const int32 max_total_size;
4595 const float iou_threshold;
4596 const float score_threshold;
4597 bool pad_per_class;
4598 bool clip_boxes;
4599 const std::vector<std::vector<int32>> expected_output_dims;
4600 const std::vector<float> exp_boxes;
4601 const std::vector<float> exp_scores;
4602 const std::vector<float> exp_classes;
4603 const std::vector<float> exp_num_detections;
4604 Status conversion_status;
4605 Status runtime_status;
4606 };
4607
4608 Status conv_status =
4609 trt_mode_ == TrtTestMode::kDynamicShape
4610 ? errors::Unimplemented(
4611 "TensorRT BatchedNMS Plugin requires input with static shape")
4612 : Status::OK();
4613
4614 std::vector<TestParams> params = {
4615 // TODO(aaroey): there is a bug in TRT's CombinedNonMaxSuppression
4616 // implementation that, the extra output classes that are outside of the
4617 // range specified by valid_detections[i] are not zeros but -1s.
4618 TestParams{
4619 "Test 1: Original test",
4620 {1, 1, 3, 4}, // boxes dims
4621 {1, 1, 3}, // scores dims
4622 {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4}, // boxes values
4623 {0.4, 0.7, 0.3}, // scores values
4624 3, // max_output_size_per_class
4625 2, // max_total_size
4626 .5f, // IOU threshold
4627 0, // score_threshold
4628 false, // pad_per_class
4629 true, // clip_boxes
4630 {{1, 2, 4}, // expected_nmsed_boxes_dims
4631 {1, 2}, // expected_nmsed_scores_dims
4632 {1, 2}, // expected_nmsed_classes_dims
4633 {1}}, // expected_valid_detections_dims
4634 {0, 0, 0.3, 0.4, 0, 0, 0.3, 0.4}, // exp_boxes_values
4635 {0.7, 0.4}, // exp_scores
4636 {1, 0}, // exp_classes
4637 {2}, // exp_num_detections
4638 conv_status},
4639 // Test with clip_boxes = False
4640 TestParams{
4641 "Test 2: clip_boxes",
4642 {1, 5, 1, 4}, // boxes dims
4643 {1, 5, 1}, // scores dims
4644 // boxes values:
4645 {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
4646 {5, 4, 3, 2, 1}, // scores values
4647 4, // max_output_size_per_class
4648 4, // max_total_size
4649 0.1, // IOU threshold
4650 0, // score threshold
4651 false, // pad_per_class
4652 false, // clip_boxes
4653 {{1, 4, 4}, // expected nmsed_boxes_dims
4654 {1, 4}, // expected nmsed_scores_dims
4655 {1, 4}, // expected_nmsed_classes_dims
4656 {1}}, // expected_valid_detections_dims
4657 // exp_boxes_values:
4658 {0, 0, 5, 10, 8, 0, 12, 4, 8, 9, 11, 12, 0, 0, 0, 0},
4659 {5, 3, 1, 0}, // exp_scores
4660 {0, 0, 0, -1}, // exp_classes
4661 {3}, // exp_num_detections
4662 conv_status},
4663 // Test with clip_boxes = False, and nonzero score threshold
4664 TestParams{
4665 "Test 3: score threshold",
4666 {1, 5, 1, 4}, // boxes dims
4667 {1, 5, 1}, // scores dims
4668 // boxes values:
4669 {0, 0, 5, 10, 0, 4, 5, 14, 8, 0, 12, 4, 6, 2, 10, 6, 8, 9, 11, 12},
4670 {5, 4, 3, 2, 1}, // scores values
4671 4, // max_output_size_per_class
4672 4, // max_total_size
4673 0.1, // IOU threshold
4674 2, // score threshold
4675 false, // pad_per_class
4676 false, // clip_boxes
4677 {{1, 4, 4}, // expected nmsed_boxes_dims
4678 {1, 4}, // expected nmsed_scores_dims
4679 {1, 4}, // expected_nmsed_classes_dims
4680 {1}}, // expected_valid_detections_dims
4681 // exp_boxes_values:
4682 {0, 0, 5, 10, 8, 0, 12, 4, 0, 0, 0, 0, 0, 0, 0, 0},
4683 {5, 3, 0, 0}, // exp_scores
4684 {0, 0, -1, -1}, // exp_classes
4685 {2}, // exp_num_detections
4686 conv_status},
4687 // Test where the boxes are defined as with max value first for the box
4688 // coordinates. This test fails before TRT 7.1.3.
4689 TestParams{
4690 "Test 4: max coord first",
4691 {1, 5, 1, 4}, // boxes dims
4692 {1, 5, 1}, // scores dims
4693 // boxes values:
4694 {5, 10, 0, 0, 5, 14, 0, 4, 12, 4, 8, 0, 10, 6, 6, 2, 11, 12, 8, 9},
4695 {5, 4, 3, 2, 1}, // scores values
4696 4, // max_output_size_per_class
4697 4, // max_total_size
4698 0.1, // IOU threshold
4699 0, // score threshold
4700 false, // pad_per_class
4701 false, // clip_boxes
4702 {{1, 4, 4}, // expected nmsed_boxes_dims
4703 {1, 4}, // expected nmsed_scores_dims
4704 {1, 4}, // expected_nmsed_classes_dims
4705 {1}}, // expected_valid_detections_dims
4706 // exp_boxes_values:
4707 {5, 10, 0, 0, 12, 4, 8, 0, 11, 12, 8, 9, 0, 0, 0, 0},
4708 {5, 3, 1, 0}, // exp_scores
4709 {0, 0, 0, -1}, // exp_classes
4710 {3}, // exp_num_detections
4711 conv_status},
4712 TestParams{"Test 5: TopK error",
4713 {1, 5000, 1, 4}, // boxes dims
4714 {1, 5000, 1}, // scores dims
4715 {}, // boxes values:
4716 {}, // scores values
4717 4, // max_output_size_per_class
4718 4, // max_total_size
4719 0.1, // IOU threshold
4720 0, // score threshold
4721 false, // pad_per_class
4722 false, // clip_boxes
4723 {}, // expected_valid_detections_dims
4724 {}, // exp_boxes_values
4725 {}, // exp_scores
4726 {}, // exp_classes
4727 {}, // exp_num_detections
4728 conv_status.ok()
4729 ? errors::InvalidArgument(
4730 "TRT NMS plugin allow top_k<=4096, where top_k = "
4731 "max(num_boxes, max_total_size). You can override "
4732 "this by setting TF_TRT_ALLOW_NMS_TOPK_OVERRIDE=1 "
4733 "environment variable, but this can result in a "
4734 "loss of accuracy.")
4735 : conv_status},
4736 };
4737
4738 for (auto p : params) {
4739 Reset();
4740 SCOPED_TRACE(p.description);
4741 AddTestTensor("boxes", p.boxes_tensor_dims, p.boxes_values);
4742 AddTestTensor("scores", p.scores_tensor_dims, p.scores_values);
4743 AddTestWeights<int32>("max_output_size_per_class", {1},
4744 {p.max_output_size_per_class});
4745 AddTestWeights<int32>("max_total_size", {1}, {p.max_total_size});
4746 AddTestWeights<float>("iou_threshold", {1}, {p.iou_threshold}, tf_type_);
4747 AddTestWeights<float>("score_threshold", {1}, {p.score_threshold},
4748 tf_type_);
4749
4750 auto node_def = get_nms_nodedef(tf_type_, p.clip_boxes, p.pad_per_class);
4751
4752 TestOpConverterMultiOut("my_nms", node_def, p.expected_output_dims,
4753 p.conversion_status, p.runtime_status,
4754 {
4755 ElementsAreArray(p.exp_boxes),
4756 ElementsAreArray(p.exp_scores),
4757 ElementsAreArray(p.exp_classes),
4758 ElementsAreArray(p.exp_num_detections),
4759 },
4760 {tf_type_, tf_type_, tf_type_, DT_INT32});
4761 }
4762 }
4763
4764 #endif // IS_TRT_VERSION_GE(7, 1, 3, 0)
4765
4766 template <typename T>
CreateUnaryOp(DataType tf_type)4767 NodeDef CreateUnaryOp(DataType tf_type) {
4768 Scope s = Scope::NewRootScope();
4769 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4770 return T(s.WithOpName("my_unary"), input).operation.node()->def();
4771 }
4772
4773 constexpr float kLeakyReluAlpha = 0.2f;
4774 template <>
CreateUnaryOp(DataType tf_type)4775 NodeDef CreateUnaryOp<ops::internal::LeakyRelu>(DataType tf_type) {
4776 Scope s = Scope::NewRootScope();
4777 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4778 return ops::internal::LeakyRelu(
4779 s.WithOpName("my_unary"), input,
4780 ops::internal::LeakyRelu::Alpha(kLeakyReluAlpha))
4781 .operation.node()
4782 ->def();
4783 }
4784
TEST_P(OpConverter_FP32_UnaryTest,ConvertActivation)4785 TEST_P(OpConverter_FP32_UnaryTest, ConvertActivation) {
4786 constexpr float kSeluAlpha = 1.7580993408473768599402175208123f;
4787 constexpr float kSeluScale = 1.0507009873554804934193349852946f;
4788 using OpFunc = std::function<NodeDef(DataType)>;
4789 using ValFunc = float (*)(float);
4790 std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
4791
4792 #define ADD_OP(name, op, compute) \
4793 op_map[name] = std::make_pair(CreateUnaryOp<op>, compute)
4794 ADD_OP("LeakyRelu", ops::internal::LeakyRelu,
4795 [](float x) { return (x > 0.0f) ? x : x * kLeakyReluAlpha; });
4796 ADD_OP("Relu", ops::Relu, [](float x) { return (x > 0.0f) ? x : 0.0f; });
4797 ADD_OP("Relu6", ops::Relu6,
4798 [](float x) { return std::min(std::max(x, 0.0f), 6.0f); });
4799 ADD_OP("Sigmoid", ops::Sigmoid,
4800 [](float x) { return 1.0f / (1.0f + std::exp(-x)); });
4801 ADD_OP("Tanh", ops::Tanh, static_cast<ValFunc>(std::tanh));
4802 ADD_OP("Elu", ops::Elu,
4803 [](float x) { return (x > 0.0f) ? x : std::exp(x) - 1; });
4804 ADD_OP("Selu", ops::Selu, [](float x) {
4805 return (x > 0.0f) ? kSeluScale * x
4806 : kSeluScale * kSeluAlpha * (std::exp(x) - 1);
4807 });
4808 ADD_OP("Softsign", ops::Softsign,
4809 [](float x) { return x / (std::abs(x) + 1); });
4810 ADD_OP("Softplus", ops::Softplus,
4811 [](float x) { return std::log(std::exp(x) + 1); });
4812 #undef ADD_OP
4813
4814 // std::exp in Softplus will overflow for input > 88
4815 const std::vector<float> input = {-100, -2, -1, 0, 1, 88};
4816 const bool nan_sensitive = false;
4817
4818 #if IS_TRT_VERSION_GE(8, 0, 0, 0)
4819 // NVBug # 3322482 - Known bug with TRT 8.0 on specific GPU architectures
4820 const float max_abs_error = 1e-4;
4821 #else
4822 const float max_abs_error = 0.;
4823 #endif
4824 RunTests("Activation", *ActivationTypeMap(), op_map, input, "input",
4825 max_abs_error, nan_sensitive);
4826 }
4827
TEST_P(OpConverter_FP32_Test,ConvertExpandDims)4828 TEST_P(OpConverter_FP32_Test, ConvertExpandDims) {
4829 // Get the NodeDef for ExpandDims.
4830 Scope s = Scope::NewRootScope();
4831 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
4832 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
4833 auto expanddims =
4834 ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
4835 const NodeDef& node_def = expanddims.operation.node()->def();
4836 {
4837 // Input is weights, should fail.
4838 Reset();
4839 AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
4840 AddTestWeights<int32>("weights", {1}, {1});
4841 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4842 "The input \"input\" for ExpandDims must be a "
4843 "tensor");
4844 }
4845 {
4846 // Axis is a tensor, should fail.
4847 Reset();
4848 AddTestTensor("input", {3, 2, 1});
4849 AddTestTensor("weights", {3});
4850 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
4851 "The input \"axis\" for ExpandDims must be a "
4852 "constant");
4853 }
4854 std::vector<TestParamBase> test_params = {
4855 TestParamBase{{1, 1, 2, 3},
4856 {},
4857 {1, 1, 1, 2, 3},
4858 {0},
4859 trt_mode_ == TrtTestMode::kImplicitBatch
4860 ? Status(error::UNIMPLEMENTED,
4861 "TensorRT does not allow manipulation of the "
4862 "batch dimension")
4863 : Status::OK()},
4864 TestParamBase{{1, 1, 2, 3},
4865 {},
4866 {1, 1, 1, 2, 3},
4867 {-5},
4868 trt_mode_ == TrtTestMode::kImplicitBatch
4869 ? Status(error::UNIMPLEMENTED,
4870 "TensorRT does not allow manipulation of the "
4871 "batch dimension")
4872 : Status::OK()},
4873 TestParamBase{{1, 1, 2, 3},
4874 {},
4875 {},
4876 {5},
4877 Status(error::INVALID_ARGUMENT,
4878 "Axis value of 5 is out of bounds, must be in range"
4879 " [-5, 5)")},
4880 TestParamBase{{1, 1, 2, 3},
4881 {},
4882 {},
4883 {-6},
4884 Status(error::INVALID_ARGUMENT,
4885 "Axis value of -6 is out of bounds, must be in range"
4886 " [-5, 5)")},
4887 TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {1}},
4888 TestParamBase{{1, 2, 3}, {}, {1, 1, 2, 3}, {-3}},
4889 TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {3}},
4890 TestParamBase{{1, 2, 3}, {}, {1, 2, 3, 1}, {-1}},
4891 TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {2}},
4892 TestParamBase{{1, 2, 3}, {}, {1, 2, 1, 3}, {-2}},
4893 TestParamBase{{1, 6}, {}, {1, 1, 6}, {1}},
4894 TestParamBase{{1, 6}, {}, {1, 6, 1}, {-1}},
4895 };
4896 for (auto p : test_params) {
4897 Reset();
4898 AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6});
4899 AddTestWeights<int32>("weights", {1}, {p.param[0]});
4900 TestOpConverter("my_expanddims", node_def, p.expected_output_dims, p.status,
4901 p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
4902 }
4903 }
4904
TEST_P(OpConverter_FP32_FP16_Test,ConvertSoftmax)4905 TEST_P(OpConverter_FP32_FP16_Test, ConvertSoftmax) {
4906 // Get the NodeDef for SoftMax.
4907 Scope s = Scope::NewRootScope();
4908 auto input = ops::Placeholder(s.WithOpName("logits"), tf_type_);
4909 auto softmax = ops::Softmax(s.WithOpName("my_softmax"), input);
4910 const NodeDef& node_def = softmax.operation.node()->def();
4911
4912 struct TestParams {
4913 std::vector<int> input_dims;
4914 std::vector<float> expected_values;
4915 };
4916 std::vector<TestParams> test_params = {
4917 TestParams{/*input_dims=*/{2, 3},
4918 /*expected_values=*/{0.09003057, 0.24472848, 0.66524094,
4919 0.09003057, 0.24472848, 0.66524094}},
4920 TestParams{/*input_dims=*/{6, 1},
4921 /*expected_values=*/{1, 1, 1, 1, 1, 1}}, // works w/ std input
4922 TestParams{/*input_dims=*/{1, 6}, // this works w/ arange(1,7) input
4923 /*expected_values=*/{0.00426978, 0.01160646, 0.03154963,
4924 0.08576079, 0.23312202, 0.6336913}}};
4925 std::vector<float> input_values{1, 2, 3, 4, 5, 6};
4926 for (auto p : test_params) {
4927 Reset();
4928 AddTestTensor("logits", p.input_dims, input_values);
4929 TestOpConverter("my_softmax", node_def, p.input_dims, Status::OK(),
4930 Status::OK(), ArrayFloatNear(p.expected_values, 1e-3));
4931 }
4932 }
4933
TEST_P(OpConverter_FP32_FP16_Test,ConvertLogSoftmax)4934 TEST_P(OpConverter_FP32_FP16_Test, ConvertLogSoftmax) {
4935 // Get the NodeDef for LogSoftMax.
4936 Scope s = Scope::NewRootScope();
4937 auto input = ops::Placeholder(s.WithOpName("logits"), tf_type_);
4938 auto logsoftmax = ops::LogSoftmax(s.WithOpName("my_logsoftmax"), input);
4939 const NodeDef& node_def = logsoftmax.operation.node()->def();
4940
4941 struct TestParams {
4942 std::vector<int> input_dims;
4943 std::vector<float> expected_values;
4944 };
4945
4946 std::vector<TestParams> test_params = {
4947 TestParams{/*input_dims=*/{2, 3},
4948 /*expected_values=*/{-2.4076061, -1.407606, -0.40760604,
4949 -2.4076061, -1.407606, -0.40760604}},
4950 TestParams{/*input_dims=*/{1, 6},
4951 /*expected_values=*/{-5.4561934, -4.4561934, -3.4561934,
4952 -2.4561934, -1.4561933, -0.45619333}},
4953 TestParams{/*input_dims=*/{6, 1},
4954 /*expected_values=*/{0, 0, 0, 0, 0, 0}}};
4955 std::vector<float> input_values{1, 2, 3, 4, 5, 6};
4956 for (auto p : test_params) {
4957 Reset();
4958 AddTestTensor("logits", p.input_dims, input_values);
4959 TestOpConverter("my_logsoftmax", node_def, p.input_dims, Status::OK(),
4960 Status::OK(), ArrayFloatNear(p.expected_values, 1e-3));
4961 }
4962 }
4963
TEST_P(OpConverter_FP32_Test,ConvertSqueeze)4964 TEST_P(OpConverter_FP32_Test, ConvertSqueeze) {
4965 const bool use_implicit_batch = (trt_mode_ == TrtTestMode::kImplicitBatch);
4966 // Get the NodeDef for Squeeze.
4967 auto get_squeeze_nodedef = [](std::vector<int> axes,
4968 DataType tf_type) -> NodeDef {
4969 Scope s = Scope::NewRootScope();
4970 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
4971 if (!axes.empty()) {
4972 ops::Squeeze::Attrs squeeze_attrs;
4973 squeeze_attrs.axis_ = gtl::ArraySlice<int>(axes); // non-absl ok
4974 auto squeeze =
4975 ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
4976 return squeeze.operation.node()->def();
4977 } else {
4978 auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
4979 return squeeze.operation.node()->def();
4980 }
4981 };
4982 std::vector<TestParamBase> test_params = {
4983 TestParamBase{
4984 {1, 2, 1, 3}, // input dims
4985 {}, // input partial dims
4986 {2, 3}, // expected output dims
4987 {}, // axis
4988 trt_mode_ == TrtTestMode::kExplicitBatch
4989 ? Status::OK()
4990 : Status{error::UNIMPLEMENTED,
4991 "Squeeze is not implemented for empty squeeze_dims"}},
4992 TestParamBase{{1, 2, 1, 3},
4993 {},
4994 {2, 1, 3},
4995 {0},
4996 use_implicit_batch
4997 ? Status{error::UNIMPLEMENTED,
4998 "TensorRT does not allow manipulation of the "
4999 "batch dimension"}
5000 : Status::OK()},
5001 TestParamBase{{1, 2, 1, 3},
5002 {},
5003 {2, 1, 3},
5004 {-4},
5005 use_implicit_batch
5006 ? Status{error::UNIMPLEMENTED,
5007 "TensorRT does not allow manipulation of the "
5008 "batch dimension"}
5009 : Status::OK()},
5010 TestParamBase{
5011 {1, 1, 2, 3},
5012 {},
5013 {},
5014 {4},
5015 Status{error::INVALID_ARGUMENT,
5016 "Axis value of 4 is out of bounds, must be in range [-4, 4)"}},
5017 TestParamBase{
5018 {1, 1, 2, 3},
5019 {},
5020 {},
5021 {-5},
5022 Status{
5023 error::INVALID_ARGUMENT,
5024 "Axis value of -5 is out of bounds, must be in range [-4, 4)"}},
5025 TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {1}},
5026 TestParamBase{{1, 1, 2, 3}, {}, {1, 2, 3}, {-3}},
5027 TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {3}},
5028 TestParamBase{{1, 2, 3, 1}, {}, {1, 2, 3}, {-1}},
5029 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, 3, 5}},
5030 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {3, 1, 5}},
5031 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {-1, -3, -5}},
5032 TestParamBase{{1, 1, 2, 1, 3, 1}, {}, {1, 2, 3}, {1, -3, 5}},
5033 TestParamBase{{1, 1, 6}, {}, {1, 6}, {1}},
5034 TestParamBase{{1, 6, 1}, {}, {1, 6}, {2}},
5035 };
5036 auto squeeze_non_singleton = TestParamBase{
5037 {1, 1, 2, 3},
5038 {},
5039 {},
5040 {2},
5041 Status{error::INVALID_ARGUMENT,
5042 "Dimension 2 with size 2 cannot be squeezed because it must be "
5043 "size 1"}};
5044
5045 if (trt_mode_ == TrtTestMode::kDynamicShape) {
5046 // In this test we try to squeeze axis=2 which has size > 1. In dynamic
5047 // shape mode the converter sees only -1, so it cannot catch this error.
5048 squeeze_non_singleton.status = Status::OK(); // conversion status
5049 squeeze_non_singleton.runtime_status =
5050 errors::InvalidArgument("Negative number of dimensions -1");
5051 // Dynamic shape tests with partially known input shape
5052 test_params.push_back(TestParamBase{{2, 1, 3}, {2, -1, 3}, {2, 3}, {1}});
5053 test_params.push_back(TestParamBase{{2, 1, 3}, {2, 1, -1}, {2, 3}, {1}});
5054 }
5055 test_params.push_back(squeeze_non_singleton);
5056
5057 for (TestParamBase p : test_params) {
5058 SCOPED_TRACE(p);
5059 Reset();
5060 NodeDef node_def = get_squeeze_nodedef(p.param, tf_type_);
5061 AddTestTensor("input", p.input_dims, {1, 2, 3, 4, 5, 6},
5062 p.partial_input_dims);
5063 TestOpConverter("my_squeeze", node_def, p.expected_output_dims, p.status,
5064 p.runtime_status, ElementsAreArray({1, 2, 3, 4, 5, 6}));
5065 }
5066 }
5067
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertStridedSlice)5068 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertStridedSlice) {
5069 // Get nodedef for StridedSlice layer.
5070 auto get_strided_slice_nodedef =
5071 [](DataType tf_type, int64 begin_mask = 0, int64 end_mask = 0,
5072 int64 ellipsis_mask = 0, int64 new_axis_mask = 0,
5073 int64 shrink_axis_mask = 0) -> NodeDef {
5074 Scope s = Scope::NewRootScope();
5075 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5076 auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
5077 auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
5078 auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
5079 ops::StridedSlice::Attrs attrs = ops::StridedSlice::Attrs()
5080 .BeginMask(begin_mask)
5081 .EndMask(end_mask)
5082 .EllipsisMask(ellipsis_mask)
5083 .NewAxisMask(new_axis_mask)
5084 .ShrinkAxisMask(shrink_axis_mask);
5085 auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
5086 input, begin, end, strides, attrs);
5087 return strided_slice.operation.node()->def();
5088 };
5089
5090 {
5091 // Input is weights, should fail.
5092 Reset();
5093 NodeDef node_def = get_strided_slice_nodedef(tf_type_);
5094 AddTestWeights<int32>("input", {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
5095 AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
5096 AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
5097 AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
5098 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
5099 "The input \"input\" for StridedSlice must "
5100 "be a tensor");
5101 }
5102 {
5103 // Begin, end, strides are tensors, should fail.
5104 Reset();
5105 NodeDef node_def = get_strided_slice_nodedef(tf_type_);
5106 AddTestTensor("input", {4, 1, 1, 1});
5107 AddTestTensor("begin", {4});
5108 AddTestTensor("end", {4});
5109 AddTestTensor("strides", {4});
5110 RunValidationAndConversion(
5111 node_def, error::UNIMPLEMENTED,
5112 "The input \"begin\" for StridedSlice must be a constant");
5113 }
5114
5115 struct TestParams {
5116 std::vector<int> input_dims;
5117 std::vector<int> begin;
5118 std::vector<int> end;
5119 std::vector<int> strides;
5120 int begin_mask;
5121 int end_mask;
5122 int ellipsis_mask;
5123 int new_axis_mask;
5124 int shrink_axis_mask;
5125 std::vector<int> expected_output_dims;
5126 std::vector<float> expected_output;
5127 Status conversion_status;
5128 Status runtime_status;
5129 std::vector<int> partial_input_dims;
5130 };
5131
5132 auto get_mask = [](const std::vector<int>& mask) {
5133 int result = 0;
5134 for (int i = 0; i < mask.size(); i++) {
5135 if (mask[i]) result += (1 << i);
5136 }
5137 return result;
5138 };
5139
5140 // Same input is used for all tests.
5141 const std::vector<float> ok_input = {1, 2, 3, 4, 5, 6};
5142
5143 Status modified_batch_dim_status =
5144 (trt_mode_ == TrtTestMode::kImplicitBatch)
5145 ? errors::Unimplemented(
5146 "TensorRT does not allow modifications to "
5147 "the batch dimension")
5148 : Status::OK();
5149 std::vector<TestParams> params = {
5150 // Modify batch dim, should fail in implicit batch mode.
5151 TestParams{/*input_dims=*/{2, 1, 1, 3},
5152 /*begin=*/{0, 0, 0, 0},
5153 /*end=*/{1, 1, 1, 2},
5154 /*strides=*/{1, 1, 1, 1},
5155 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5156 /*end_mask=*/get_mask({0, 0, 0, 0}),
5157 /*ellipsis_mask=*/0,
5158 /*new_axis_mask=*/0,
5159 /*shrink_axis_mask=*/0,
5160 /*expected_output_dims=*/{1, 1, 1, 2},
5161 /*expected_output=*/{1, 2},
5162 /*conversion_status=*/modified_batch_dim_status,
5163 /*runtime_status=*/Status::OK(),
5164 /*partial_input_dims=*/{}},
5165 // Unknown batch size without end_mask.
5166 TestParams{
5167 /*input_dims=*/{2, 1, 1, 3},
5168 /*begin=*/{0, 0, 0, 0},
5169 /*end=*/{1, 1, 1, 2},
5170 /*strides=*/{1, 1, 1, 1},
5171 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5172 /*end_mask=*/get_mask({0, 0, 0, 0}),
5173 /*ellipsis_mask=*/0,
5174 /*new_axis_mask=*/0,
5175 /*shrink_axis_mask=*/0,
5176 /*expected_output_dims=*/{1, 1, 1, 2},
5177 /*expected_output=*/{1, 2},
5178 modified_batch_dim_status,
5179 Status::OK(),
5180 /*partial_input_dims=*/{-1, 1, 1, 3},
5181 },
5182 // Test Case 2: Unknown batch size with end_mask.
5183 TestParams{
5184 /*input_dims=*/{2, 1, 1, 3},
5185 /*begin=*/{0, 0, 0, 0},
5186 /*end=*/{0, 1, 1, 2},
5187 /*strides=*/{1, 1, 1, 1},
5188 /*begin_mask=*/get_mask({1, 0, 0, 0}),
5189 /*end_mask=*/get_mask({1, 0, 0, 0}),
5190 /*ellipsis_mask=*/0,
5191 /*new_axis_mask=*/0,
5192 /*shrink_axis_mask=*/0,
5193 /*expected_output_dims=*/{2, 1, 1, 2},
5194 /*expected_output=*/{1, 2, 4, 5},
5195 Status::OK(),
5196 Status::OK(),
5197 /*partial_input_dims=*/{-1, 1, 1, 3},
5198 },
5199 // Invalid parameters: end[2] < begin[2]
5200 TestParams{/*input_dims=*/{1, 1, 2, 3},
5201 /*begin=*/{0, 0, 2, 0},
5202 /*end=*/{1, 1, 0, 3},
5203 /*strides=*/{1, 1, 1, 1},
5204 /*begin_mask=*/0,
5205 /*end_mask=*/0,
5206 /*ellipsis_mask=*/0,
5207 /*new_axis_mask=*/0,
5208 /*shrink_axis_mask=*/0,
5209 /*expected_output_dims=*/{},
5210 /*expected_output=*/{},
5211 errors::InvalidArgument("\"size\" cannot be negative for "
5212 "StridedSlice"),
5213 Status::OK(),
5214 /*partial_input_dims=*/{}},
5215 // Slice on the last two dimensions. All dimensions are static.
5216 TestParams{
5217 /*input_dims=*/{1, 1, 2, 3},
5218 /*begin=*/{0, 0, 0, 0},
5219 /*end=*/{0, 0, 1, 2},
5220 /*strides=*/{1, 1, 1, 1},
5221 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5222 /*end_mask=*/get_mask({1, 1, 0, 0}),
5223 /*ellipsis_mask=*/0,
5224 /*new_axis_mask=*/0,
5225 /*shrink_axis_mask=*/0,
5226 /*expected_output_dims=*/{1, 1, 1, 2},
5227 /*expected_output=*/{1, 2},
5228 },
5229 // Slice on the last two dimensions. The slice is fully
5230 // specified for the dynamic dimensions.
5231 TestParams{
5232 /*input_dims=*/{1, 1, 2, 3},
5233 /*begin=*/{0, 0, 0, 0},
5234 /*end=*/{0, 0, 1, 2},
5235 /*strides=*/{1, 1, 1, 1},
5236 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5237 /*end_mask=*/get_mask({1, 1, 0, 0}),
5238 /*ellipsis_mask=*/0,
5239 /*new_axis_mask=*/0,
5240 /*shrink_axis_mask=*/0,
5241 /*expected_output_dims=*/{1, 1, 1, 2},
5242 /*expected_output=*/{1, 2},
5243 Status::OK(),
5244 Status::OK(),
5245 /*partial_input_dims=*/{1, 1, -1, -1},
5246 },
5247 // End mask is provided on all dimensions. This should override the fact
5248 // that the end value is 0. For dynamic shape, it tests
5249 // that we can infer tensor size when "end mask" is provided.
5250 TestParams{
5251 /*input_dims=*/{1, 1, 2, 3},
5252 /*begin=*/{0, 0, 1, 1},
5253 /*end=*/{0, 0, 0, 0},
5254 /*strides=*/{1, 1, 1, 1},
5255 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5256 /*end_mask=*/get_mask({1, 1, 1, 1}),
5257 /*ellipsis_mask=*/0,
5258 /*new_axis_mask=*/0,
5259 /*shrink_axis_mask=*/0,
5260 /*expected_output_dims=*/{1, 1, 1, 2},
5261 /*expected_output=*/{5, 6},
5262 Status::OK(),
5263 Status::OK(),
5264 /*partial_input_dims=*/{1, 1, -1, -1},
5265 },
5266 // End mask is provided for the batch dimension to overwrite the end value
5267 // 0 for that dimension.
5268 TestParams{
5269 /*input_dims=*/{1, 1, 2, 3},
5270 /*begin=*/{0, 0, 1, 1},
5271 /*end=*/{0, 1, 2, 3},
5272 /*strides=*/{1, 1, 1, 1},
5273 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5274 /*end_mask=*/get_mask({1, 1, 0, 0}),
5275 /*ellipsis_mask=*/0,
5276 /*new_axis_mask=*/0,
5277 /*shrink_axis_mask=*/0,
5278 /*expected_output_dims=*/{1, 1, 1, 2},
5279 /*expected_output=*/{5, 6},
5280 },
5281 // Test slice on two dimensions with negative stride, without end_mask set
5282 // on crop dimensions.
5283 TestParams{/*input_dims=*/{1, 1, 2, 3},
5284 /*begin=*/{0, 0, 1, 2},
5285 /*end=*/{0, 0, 0, 0},
5286 /*strides=*/{1, 1, -1, -1},
5287 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5288 /*end_mask=*/get_mask({1, 1, 0, 0}),
5289 /*ellipsis_mask=*/0,
5290 /*new_axis_mask=*/0,
5291 /*shrink_axis_mask=*/0,
5292 /*expected_output_dims=*/{1, 1, 1, 2},
5293 /*expected_output=*/{6, 5},
5294 /*conversion_status=*/Status::OK(),
5295 /*runtime_status=*/Status::OK(),
5296 /*partial_input_dims=*/{1, 1, -1, -1}},
5297 // Test slice on two dimensions with negative stride, with end_mask set on
5298 // crop dimensions. In dynamic shape mode, this tests the runtime size
5299 // computation.
5300 TestParams{/*input_dims=*/{1, 1, 2, 3},
5301 /*begin=*/{0, 0, 1, 1},
5302 /*end=*/{0, 0, 0, 0},
5303 /*strides=*/{1, 1, -1, -1},
5304 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5305 /*end_mask=*/get_mask({1, 1, 1, 1}),
5306 /*ellipsis_mask=*/0,
5307 /*new_axis_mask=*/0,
5308 /*shrink_axis_mask=*/0,
5309 /*expected_output_dims=*/{1, 1, 2, 2},
5310 /*expected_output=*/{5, 4, 2, 1},
5311 /*conversion_status=*/Status::OK(),
5312 /*runtime_status=*/Status::OK(),
5313 /*partial_input_dims=*/{1, 1, -1, -1}},
5314 // Test slice on two dimensions with negative stride, with begin_mask set
5315 // on the crop dimensions. In dynamic shape mode, this tests the runtime
5316 // size computation.
5317 TestParams{/*input_dims=*/{1, 1, 2, 3},
5318 /*begin=*/{0, 0, 0, 0},
5319 /*end=*/{0, 0, 0, 0},
5320 /*strides=*/{1, 1, -1, -1},
5321 /*begin_mask=*/get_mask({0, 0, 1, 1}),
5322 /*end_mask=*/get_mask({1, 1, 0, 0}),
5323 /*ellipsis_mask=*/0,
5324 /*new_axis_mask=*/0,
5325 /*shrink_axis_mask=*/0,
5326 /*expected_output_dims=*/{1, 1, 1, 2},
5327 /*expected_output=*/{6, 5},
5328 /*conversion_status=*/Status::OK(),
5329 /*runtime_status=*/Status::OK(),
5330 /*partial_input_dims=*/{1, 1, -1, -1}},
5331 // Test the reversal of all non-batch dimensions by providing the begin
5332 // masks, end masks, and -1 as strides.
5333 TestParams{/*input_dims=*/{1, 1, 2, 3},
5334 /*begin=*/{0, 0, 0, 0},
5335 /*end=*/{0, 0, 0, 0},
5336 /*strides=*/{1, -1, -1, -1},
5337 /*begin_mask=*/get_mask({1, 1, 1, 1}),
5338 /*end_mask=*/get_mask({1, 1, 1, 1}),
5339 /*ellipsis_mask=*/0,
5340 /*new_axis_mask=*/0,
5341 /*shrink_axis_mask=*/0,
5342 /*expected_output_dims=*/{1, 1, 2, 3},
5343 /*expected_output=*/{6, 5, 4, 3, 2, 1},
5344 /*conversion_status=*/Status::OK(),
5345 /*runtime_status=*/Status::OK(),
5346 /*partial_input_dims=*/{1, -1, -1, -1}},
5347 // Slice on dimensions 1 and 2.
5348 TestParams{
5349 /*input_dims=*/{1, 2, 3, 1},
5350 /*begin=*/{0, 0, 0, 0},
5351 /*end=*/{0, 1, 2, 1},
5352 /*strides=*/{1, 1, 1, 1},
5353 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5354 /*end_mask=*/get_mask({1, 0, 0, 0}),
5355 /*ellipsis_mask=*/0,
5356 /*new_axis_mask=*/0,
5357 /*shrink_axis_mask=*/0,
5358 /*expected_output_dims=*/{1, 1, 2, 1},
5359 /*expected_output=*/{1, 2},
5360 },
5361 // Slice on dimensions 1 and 2.
5362 TestParams{
5363 /*input_dims=*/{1, 2, 3, 1},
5364 /*begin=*/{0, 1, 1, 0},
5365 /*end=*/{0, 2, 3, 1},
5366 /*strides=*/{1, 1, 1, 1},
5367 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5368 /*end_mask=*/get_mask({1, 0, 0, 0}),
5369 /*ellipsis_mask=*/0,
5370 /*new_axis_mask=*/0,
5371 /*shrink_axis_mask=*/0,
5372 /*expected_output_dims=*/{1, 1, 2, 1},
5373 /*expected_output=*/{5, 6},
5374 },
5375 // Slice on dimensions 1 and 3.
5376 TestParams{
5377 /*input_dims=*/{1, 2, 1, 3},
5378 /*begin=*/{0, 0, 0, 0},
5379 /*end=*/{0, 1, 1, 2},
5380 /*strides=*/{1, 1, 1, 1},
5381 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5382 /*end_mask=*/get_mask({1, 0, 0, 0}),
5383 /*ellipsis_mask=*/0,
5384 /*new_axis_mask=*/0,
5385 /*shrink_axis_mask=*/0,
5386 /*expected_output_dims=*/{1, 1, 1, 2},
5387 /*expected_output=*/{1, 2},
5388 },
5389 // Slice on dimensions 1 and 3 with non-zero slice start.
5390 TestParams{
5391 /*input_dims=*/{1, 2, 1, 3},
5392 /*begin=*/{0, 1, 0, 1},
5393 /*end=*/{0, 2, 1, 3},
5394 /*strides=*/{1, 1, 1, 1},
5395 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5396 /*end_mask=*/get_mask({1, 0, 0, 0}),
5397 /*ellipsis_mask=*/0,
5398 /*new_axis_mask=*/0,
5399 /*shrink_axis_mask=*/0,
5400 /*expected_output_dims=*/{1, 1, 1, 2},
5401 /*expected_output=*/{5, 6},
5402 },
5403 // Slice on 3D tensor.
5404 TestParams{
5405 /*input_dims=*/{1, 2, 3},
5406 /*begin=*/{0, 0, 0},
5407 /*end=*/{0, 1, 2},
5408 /*strides=*/{1, 1, 1},
5409 /*begin_mask=*/get_mask({0, 0, 0}),
5410 /*end_mask=*/get_mask({1, 0, 0}),
5411 /*ellipsis_mask=*/0,
5412 /*new_axis_mask=*/0,
5413 /*shrink_axis_mask=*/0,
5414 /*expected_output_dims=*/{1, 1, 2},
5415 /*expected_output=*/{1, 2},
5416 },
5417 // Slice on 3D tensor using end_mask. For dynamic shape, all
5418 // dimensions are dynamic.
5419 TestParams{/*input_dims=*/{1, 2, 3},
5420 /*begin=*/{0, 1, 1},
5421 /*end=*/{0, 0, 0},
5422 /*strides=*/{1, 1, 1},
5423 /*begin_mask=*/get_mask({0, 0, 0}),
5424 /*end_mask=*/get_mask({1, 1, 1}),
5425 /*ellipsis_mask=*/0,
5426 /*new_axis_mask=*/0,
5427 /*shrink_axis_mask=*/0,
5428 /*expected_output_dims=*/{1, 1, 2},
5429 /*expected_output=*/{5, 6},
5430 /*conversion_status=*/Status::OK(),
5431 /*runtime_status=*/Status::OK(),
5432 /*partial_input_dims=*/{-1, -1, -1}},
5433 // Slice on 3D tensor using end_mask. For dynamic shape, all
5434 // dimensions are dynamic.
5435 TestParams{/*input_dims=*/{1, 1, 2, 3},
5436 /*begin=*/{0, 0, 0, 0},
5437 /*end=*/{0, 0, 0, 2},
5438 /*strides=*/{1, 1, 1, 1},
5439 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5440 /*end_mask=*/get_mask({1, 1, 1, 0}),
5441 /*ellipsis_mask=*/0,
5442 /*new_axis_mask=*/0,
5443 /*shrink_axis_mask=*/0,
5444 /*expected_output_dims=*/{1, 1, 2, 2},
5445 /*expected_output=*/{1, 2, 4, 5},
5446 /*conversion_status=*/Status::OK(),
5447 /*runtime_status=*/Status::OK(),
5448 /*partial_input_dims=*/{-1, -1, -1, -1}},
5449 TestParams{
5450 /*input_dims=*/{1, 1, 2, 3},
5451 /*begin=*/{0, 0, 1, 0},
5452 /*end=*/{0, 0, 0, 0},
5453 /*strides=*/{1, 1, 1, 1},
5454 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5455 /*end_mask=*/get_mask({1, 1, 1, 1}),
5456 /*ellipsis_mask=*/0,
5457 /*new_axis_mask=*/0,
5458 /*shrink_axis_mask=*/0,
5459 /*expected_output_dims=*/{1, 1, 1, 3},
5460 /*expected_output=*/{4, 5, 6},
5461 },
5462 // 1D simple slice.
5463 TestParams{/*input_dims=*/{1, 2, 3, 1},
5464 /*begin=*/{0, 0, 0, 0},
5465 /*end=*/{0, 1, 0, 0},
5466 /*strides=*/{1, 1, 1, 1},
5467 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5468 /*end_mask=*/get_mask({1, 0, 1, 1}),
5469 /*ellipsis_mask=*/0,
5470 /*new_axis_mask=*/0,
5471 /*shrink_axis_mask=*/0,
5472 /*expected_output_dims=*/{1, 1, 3, 1},
5473 /*expected_output=*/{1, 2, 3},
5474 /*conversion_status=*/Status::OK(),
5475 /*runtime_status=*/Status::OK(),
5476 /*partial_input_dims=*/{-1, -1, -1, -1}},
5477 TestParams{
5478 /*input_dims=*/{1, 2, 3, 1},
5479 /*begin=*/{0, 1, 0, 0},
5480 /*end=*/{0, 0, 0, 0},
5481 /*strides=*/{1, 1, 1, 1},
5482 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5483 /*end_mask=*/get_mask({1, 1, 1, 1}),
5484 /*ellipsis_mask=*/0,
5485 /*new_axis_mask=*/0,
5486 /*shrink_axis_mask=*/0,
5487 /*expected_output_dims=*/{1, 1, 3, 1},
5488 /*expected_output=*/{4, 5, 6},
5489 },
5490 // Simple 1D slice on 2D input.
5491 TestParams{/*input_dims=*/{1, 6},
5492 /*begin=*/{0, 0},
5493 /*end=*/{0, 3},
5494 /*strides=*/{1, 1},
5495 /*begin_mask=*/get_mask({0, 0}),
5496 /*end_mask=*/get_mask({1, 0}),
5497 /*ellipsis_mask=*/0,
5498 /*new_axis_mask=*/0,
5499 /*shrink_axis_mask=*/0,
5500 /*expected_output_dims=*/{1, 3},
5501 /*expected_output=*/{1, 2, 3},
5502 /*conversion_status=*/Status::OK(),
5503 /*runtime_status=*/Status::OK(),
5504 /*partial_input_dims=*/{-1, -1}},
5505 TestParams{
5506 /*input_dims=*/{1, 1, 6},
5507 /*begin=*/{0, 0, 2},
5508 /*end=*/{0, 0, 5},
5509 /*strides=*/{1, 1, 1},
5510 /*begin_mask=*/get_mask({0, 0, 0}),
5511 /*end_mask=*/get_mask({1, 1, 0}),
5512 /*ellipsis_mask=*/0,
5513 /*new_axis_mask=*/0,
5514 /*shrink_axis_mask=*/0,
5515 /*expected_output_dims=*/{1, 1, 3},
5516 /*expected_output=*/{3, 4, 5},
5517 },
5518 TestParams{
5519 /*input_dims=*/{1, 6, 1},
5520 /*begin=*/{0, 2, 0},
5521 /*end=*/{0, 5, 0},
5522 /*strides=*/{1, 1, 1},
5523 /*begin_mask=*/get_mask({0, 0, 0}),
5524 /*end_mask=*/get_mask({1, 0, 1}),
5525 /*ellipsis_mask=*/0,
5526 /*new_axis_mask=*/0,
5527 /*shrink_axis_mask=*/0,
5528 /*expected_output_dims=*/{1, 3, 1},
5529 /*expected_output=*/{3, 4, 5},
5530 },
5531 // Negative axis.
5532 TestParams{
5533 /*input_dims=*/{1, 6, 1},
5534 /*begin=*/{0, -6, 0},
5535 /*end=*/{0, -3, 0},
5536 /*strides=*/{1, 1, 1},
5537 /*begin_mask=*/get_mask({0, 0, 0}),
5538 /*end_mask=*/get_mask({1, 0, 1}),
5539 /*ellipsis_mask=*/0,
5540 /*new_axis_mask=*/0,
5541 /*shrink_axis_mask=*/0,
5542 /*expected_output_dims=*/{1, 3, 1},
5543 /*expected_output=*/{1, 2, 3},
5544 },
5545 TestParams{
5546 /*input_dims=*/{1, 6, 1},
5547 /*begin=*/{0, 0, 0},
5548 /*end=*/{0, -1, 0},
5549 /*strides=*/{1, 1, 1},
5550 /*begin_mask=*/get_mask({0, 0, 0}),
5551 /*end_mask=*/get_mask({1, 0, 1}),
5552 /*ellipsis_mask=*/0,
5553 /*new_axis_mask=*/0,
5554 /*shrink_axis_mask=*/0,
5555 /*expected_output_dims=*/{1, 5, 1},
5556 /*expected_output=*/{1, 2, 3, 4, 5},
5557 },
5558 // Clamp out of bounds begin and end.
5559 TestParams{
5560 /*input_dims=*/{1, 1, 2, 3},
5561 /*begin=*/{0, 0, -9999, -9},
5562 /*end=*/{0, 1, 1000, 4},
5563 /*strides=*/{1, 1, 1, 1},
5564 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5565 /*end_mask=*/get_mask({1, 0, 0, 0}),
5566 /*ellipsis_mask=*/0,
5567 /*new_axis_mask=*/0,
5568 /*shrink_axis_mask=*/0,
5569 /*expected_output_dims=*/{1, 1, 2, 3},
5570 /*expected_output=*/{1, 2, 3, 4, 5, 6},
5571 },
5572 // Stride values >= 2.
5573 TestParams{/*input_dims=*/{1, 6},
5574 /*begin=*/{0, 0},
5575 /*end=*/{0, 5},
5576 /*strides=*/{1, 2},
5577 /*begin_mask=*/get_mask({0, 0}),
5578 /*end_mask=*/get_mask({1, 0}),
5579 /*ellipsis_mask=*/0,
5580 /*new_axis_mask=*/0,
5581 /*shrink_axis_mask=*/0,
5582 /*expected_output_dims=*/{1, 3},
5583 /*expected_output=*/{1, 3, 5},
5584 /*conversion_status=*/Status::OK(),
5585 /*runtime_status=*/Status::OK(),
5586 /*partial_input_dims=*/{-1, -1}},
5587 TestParams{/*input_dims=*/{1, 6},
5588 /*begin=*/{0, 0},
5589 /*end=*/{0, 6},
5590 /*strides=*/{1, 2},
5591 /*begin_mask=*/get_mask({0, 0}),
5592 /*end_mask=*/get_mask({1, 0}),
5593 /*ellipsis_mask=*/0,
5594 /*new_axis_mask=*/0,
5595 /*shrink_axis_mask=*/0,
5596 /*expected_output_dims=*/{1, 3},
5597 /*expected_output=*/{1, 3, 5},
5598 /*conversion_status=*/Status::OK(),
5599 /*runtime_status=*/Status::OK(),
5600 /*partial_input_dims=*/{-1, -1}},
5601 TestParams{/*input_dims=*/{1, 6},
5602 /*begin=*/{0, 1},
5603 /*end=*/{0, 6},
5604 /*strides=*/{1, 2},
5605 /*begin_mask=*/get_mask({0, 0}),
5606 /*end_mask=*/get_mask({1, 0}),
5607 /*ellipsis_mask=*/0,
5608 /*new_axis_mask=*/0,
5609 /*shrink_axis_mask=*/0,
5610 /*expected_output_dims=*/{1, 3},
5611 /*expected_output=*/{2, 4, 6},
5612 /*conversion_status=*/Status::OK(),
5613 /*runtime_status=*/Status::OK(),
5614 /*partial_input_dims=*/{-1, -1}},
5615 TestParams{/*input_dims=*/{1, 6},
5616 /*begin=*/{0, 2},
5617 /*end=*/{0, 6},
5618 /*strides=*/{1, 3},
5619 /*begin_mask=*/get_mask({0, 0}),
5620 /*end_mask=*/get_mask({1, 0}),
5621 /*ellipsis_mask=*/0,
5622 /*new_axis_mask=*/0,
5623 /*shrink_axis_mask=*/0,
5624 /*expected_output_dims=*/{1, 2},
5625 /*expected_output=*/{3, 6},
5626 /*conversion_status=*/Status::OK(),
5627 /*runtime_status=*/Status::OK(),
5628 /*partial_input_dims=*/{-1, -1}},
5629 // Stride values <= -2.
5630 TestParams{/*input_dims=*/{1, 6},
5631 /*begin=*/{0, 5},
5632 /*end=*/{0, 0},
5633 /*strides=*/{1, -2},
5634 /*begin_mask=*/get_mask({0, 0}),
5635 /*end_mask=*/get_mask({1, 1}),
5636 /*ellipsis_mask=*/0,
5637 /*new_axis_mask=*/0,
5638 /*shrink_axis_mask=*/0,
5639 /*expected_output_dims=*/{1, 3},
5640 /*expected_output=*/{6, 4, 2},
5641 /*conversion_status=*/Status::OK(),
5642 /*runtime_status=*/Status::OK(),
5643 /*partial_input_dims=*/{-1, -1}},
5644 TestParams{/*input_dims=*/{1, 6},
5645 /*begin=*/{0, 5},
5646 /*end=*/{0, 0},
5647 /*strides=*/{1, -2},
5648 /*begin_mask=*/get_mask({0, 0}),
5649 /*end_mask=*/get_mask({1, 0}),
5650 /*ellipsis_mask=*/0,
5651 /*new_axis_mask=*/0,
5652 /*shrink_axis_mask=*/0,
5653 /*expected_output_dims=*/{1, 3},
5654 /*expected_output=*/{6, 4, 2},
5655 /*conversion_status=*/Status::OK(),
5656 /*runtime_status=*/Status::OK(),
5657 /*partial_input_dims=*/{-1, -1}},
5658 TestParams{/*input_dims=*/{1, 6},
5659 /*begin=*/{0, 5},
5660 /*end=*/{0, 1},
5661 /*strides=*/{1, -3},
5662 /*begin_mask=*/get_mask({0, 0}),
5663 /*end_mask=*/get_mask({1, 0}),
5664 /*ellipsis_mask=*/0,
5665 /*new_axis_mask=*/0,
5666 /*shrink_axis_mask=*/0,
5667 /*expected_output_dims=*/{1, 2},
5668 /*expected_output=*/{6, 3},
5669 /*conversion_status=*/Status::OK(),
5670 /*runtime_status=*/Status::OK(),
5671 /*partial_input_dims=*/{-1, -1}},
5672 // Ellipsis_mask causes leading dimensions to be ignored. Begin, end,
5673 // stride, and mask values of size 2 should be interpreted as applying to
5674 // the last 2 dimensions, while the ellipsis applies to the first 2 (for a
5675 // 4D input tensor).
5676 TestParams{/*input_dims=*/{1, 1, 2, 3},
5677 /*begin=*/{0, 1},
5678 /*end=*/{0, 2},
5679 /*strides=*/{1, 1},
5680 /*begin_mask=*/get_mask({0, 0}),
5681 /*end_mask=*/get_mask({0, 0}),
5682 /*ellipsis_mask=*/get_mask({1, 0, 0}),
5683 /*new_axis_mask=*/0,
5684 /*shrink_axis_mask=*/0,
5685 /*expected_output_dims=*/{1, 1, 2, 1},
5686 /*expected_output=*/{2, 5},
5687 /*conversion_status=*/Status::OK(),
5688 /*runtime_status=*/Status::OK(),
5689 /*partial_input_dims=*/{-1, -1, -1, -1}},
5690 // Ellipsis_mask on single inner dimension.
5691 TestParams{
5692 /*input_dims=*/{1, 1, 2, 3},
5693 /*begin=*/{0, 0, 1},
5694 /*end=*/{0, 0, 2},
5695 /*strides=*/{1, 1, 1},
5696 /*begin_mask=*/get_mask({1, 0, 0, 0}),
5697 /*end_mask=*/get_mask({1, 0, 0, 0}),
5698 /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
5699 /*new_axis_mask=*/0,
5700 /*shrink_axis_mask=*/0,
5701 /*expected_output_dims=*/{1, 1, 2, 1},
5702 /*expected_output=*/{2, 5},
5703 },
5704 // Ellipsis_mask on single leading dimension.
5705 TestParams{/*input_dims=*/{1, 1, 2, 3},
5706 /*begin=*/{0, 0, 0, 1},
5707 /*end=*/{0, 1, 2, 2},
5708 /*strides=*/{1, 1, 1, 1},
5709 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5710 /*end_mask=*/get_mask({0, 0, 0, 0}),
5711 /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
5712 /*new_axis_mask=*/0,
5713 /*shrink_axis_mask=*/0,
5714 /*expected_output_dims=*/{1, 1, 2, 1},
5715 /*expected_output=*/{2, 5},
5716 /*conversion_status=*/Status::OK(),
5717 /*runtime_status=*/Status::OK(),
5718 /*partial_input_dims=*/{-1, -1, -1, -1}},
5719 // Ellipsis_mask on single inner dimension overrides that dimensions'
5720 // begin/end values.
5721 TestParams{/*input_dims=*/{1, 1, 2, 3},
5722 /*begin=*/{0, 1, 0, 1},
5723 /*end=*/{1, 1, 2, 2},
5724 /*strides=*/{1, 1, 1, 1},
5725 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5726 /*end_mask=*/get_mask({0, 0, 0, 0}),
5727 /*ellipsis_mask=*/get_mask({0, 1, 0, 0}),
5728 /*new_axis_mask=*/0,
5729 /*shrink_axis_mask=*/0,
5730 /*expected_output_dims=*/{1, 1, 2, 1},
5731 /*expected_output=*/{2, 5},
5732 /*conversion_status=*/Status::OK(),
5733 /*runtime_status=*/Status::OK(),
5734 /*partial_input_dims=*/{-1, -1, -1, -1}},
5735 // Ellipsis mask on single leading dimension should throw out extra
5736 // leading values of begin/end vectors so that only the last N-1 values of
5737 // each remain.
5738 TestParams{/*input_dims=*/{1, 1, 2, 3},
5739 /*begin=*/{0, 0, 0, 0, 1},
5740 /*end=*/{0, 1, 1, 2, 2},
5741 /*strides=*/{1, 1, 1, 1, 1},
5742 /*begin_mask=*/get_mask({0, 0, 0, 0}),
5743 /*end_mask=*/get_mask({0, 0, 0, 0}),
5744 /*ellipsis_mask=*/get_mask({1, 0, 0, 0}),
5745 /*new_axis_mask=*/0,
5746 /*shrink_axis_mask=*/0,
5747 /*expected_output_dims=*/{1, 1, 2, 1},
5748 /*expected_output=*/{2, 5},
5749 /*conversion_status=*/Status::OK(),
5750 /*runtime_status=*/Status::OK(),
5751 /*partial_input_dims=*/{-1, -1, -1, -1}},
5752 // Shrink-axis mask set for the final dimension of final size 1 should
5753 // remove that dimension from the final shape.
5754 TestParams{/*input_dims=*/{1, 1, 2, 3},
5755 /*begin=*/{0, 0, 0, 1},
5756 /*end=*/{0, 0, 0, 2},
5757 /*strides=*/{1, 1, 1, 1},
5758 /*begin_mask=*/get_mask({1, 1, 1, 0}),
5759 /*end_mask=*/get_mask({1, 1, 1, 0}),
5760 /*ellipsis_mask=*/0,
5761 /*new_axis_mask=*/0,
5762 /*shrink_axis_mask=*/get_mask({0, 0, 0, 1}),
5763 /*expected_output_dims=*/{1, 1, 2},
5764 /*expected_output=*/{2, 5},
5765 /*conversion_status=*/Status::OK(),
5766 /*runtime_status=*/Status::OK(),
5767 /*partial_input_dims=*/{1, 1, 2, -1}},
5768 // Shrink-axis mask set for multiple dimensions that have a final size of
5769 // 1 should remove those dimensions from the final shape.
5770 TestParams{/*input_dims=*/{1, 1, 2, 3},
5771 /*begin=*/{0, 0, 0, 1},
5772 /*end=*/{0, 1, 2, 2},
5773 /*strides=*/{1, 1, 1, 1},
5774 /*begin_mask=*/get_mask({1, 0, 0, 0}),
5775 /*end_mask=*/get_mask({1, 0, 0, 0}),
5776 /*ellipsis_mask=*/0,
5777 /*new_axis_mask=*/0,
5778 /*shrink_axis_mask=*/get_mask({0, 1, 0, 1}),
5779 /*expected_output_dims=*/{1, 2},
5780 /*expected_output=*/{2, 5},
5781 /*conversion_status=*/Status::OK(),
5782 /*runtime_status=*/Status::OK(),
5783 /*partial_input_dims=*/{1, 1, 2, -1}},
5784 // Shrink-axis mask set for multiple sequential dimensions of final size 1
5785 // should
5786 // remove those dimensions from the final shape.
5787 TestParams{/*input_dims=*/{6, 1, 1},
5788 /*begin=*/{0, 0, 0},
5789 /*end=*/{0, 0, 0},
5790 /*strides=*/{1, 1, 1},
5791 /*begin_mask=*/get_mask({1, 1, 1}),
5792 /*end_mask=*/get_mask({1, 1, 1}),
5793 /*ellipsis_mask=*/0,
5794 /*new_axis_mask=*/0,
5795 /*shrink_axis_mask=*/get_mask({0, 1, 1}),
5796 /*expected_output_dims=*/{6},
5797 /*expected_output=*/{1, 2, 3, 4, 5, 6},
5798 /*conversion_status=*/Status::OK(),
5799 /*runtime_status=*/Status::OK(),
5800 /*partial_input_dims=*/{-1, -1, -1}},
5801 // The new_axis_mask parameter is not supported.
5802 TestParams{/*input_dims=*/{1, 6},
5803 /*begin=*/{0, 0, 0},
5804 /*end=*/{0, 0, 0},
5805 /*strides=*/{1, 1, 1},
5806 /*begin_mask=*/
5807 get_mask({0, 1, 1}),
5808 /*end_mask=*/get_mask({0, 1, 1}),
5809 /*ellipsis_mask=*/0,
5810 /*new_axis_mask=*/get_mask({1, 0, 0}),
5811 /*shrink_axis_mask=*/get_mask({0, 0, 0}),
5812 /*expected_output_dims=*/{1, 1, 6},
5813 /*expected_output=*/{1, 1, 6},
5814 /*conversion_status=*/
5815 errors::Unimplemented(
5816 "new_axis_mask is not supported for StridedSlice"),
5817 /*runtime_status=*/Status::OK(),
5818 /*partial_input_dims=*/{1, 6}},
5819 };
5820
5821 int i = 0;
5822 for (auto p : params) {
5823 Reset();
5824 NodeDef node_def = get_strided_slice_nodedef(
5825 tf_type_, p.begin_mask, p.end_mask, p.ellipsis_mask, p.new_axis_mask,
5826 p.shrink_axis_mask);
5827
5828 VLOG(2) << "Preparing test case " << i++ << " with dims "
5829 << DebugString(p.input_dims);
5830
5831 switch (trt_mode_) {
5832 case TrtTestMode::kImplicitBatch: {
5833 AddTestTensor("input", p.input_dims, ok_input);
5834 break;
5835 }
5836 case TrtTestMode::kExplicitBatch: {
5837 AddTestTensor("input", p.input_dims, ok_input);
5838 break;
5839 }
5840 case TrtTestMode::kDynamicShape: {
5841 if (p.partial_input_dims.size() > 0) {
5842 AddTestTensor("input", p.input_dims, tf_type_, ok_input,
5843 p.partial_input_dims);
5844
5845 } else {
5846 AddTestTensor("input", p.input_dims, tf_type_, ok_input,
5847 p.input_dims);
5848 }
5849 break;
5850 }
5851 }
5852
5853 VLOG(2) << "Adding weights begin: " << DebugString(p.begin)
5854 << ", end: " << DebugString(p.end)
5855 << ", strides: " << DebugString(p.strides);
5856 AddTestWeights<int32>("begin", {static_cast<int>(p.begin.size())}, p.begin);
5857 AddTestWeights<int32>("end", {static_cast<int>(p.end.size())}, p.end);
5858 AddTestWeights<int32>("strides", {static_cast<int>(p.strides.size())},
5859 p.strides);
5860
5861 TestOpConverter("my_strided_slice", node_def, p.expected_output_dims,
5862 p.conversion_status, p.runtime_status,
5863 ElementsAreArray(p.expected_output));
5864 }
5865 }
5866
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertSlice)5867 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertSlice) {
5868 // Get nodedef for Slice layer.
5869 auto get_slice_nodedef = [](DataType tf_type) -> NodeDef {
5870 Scope s = Scope::NewRootScope();
5871 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
5872 auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
5873 auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
5874 auto slice = ops::Slice(s.WithOpName("my_slice"), input, begin, size);
5875 return slice.operation.node()->def();
5876 };
5877
5878 struct TestParams {
5879 std::vector<int> input_dims;
5880 std::vector<int>
5881 partial_input_dims; // Symbolic shape in dynamic shape mode.
5882 std::vector<int> begin;
5883 std::vector<int> size;
5884 std::vector<int> expected_output_dims;
5885 std::vector<int> expected_output;
5886 Status conversion_status;
5887 Status runtime_status;
5888 };
5889
5890 std::vector<TestParams> params = {
5891 // Slice start points must always be >= 0.
5892 TestParams{/*input_dims=*/{1, 1, 2, 3},
5893 /*partial_input_dims=*/{-1, -1, -1, -1},
5894 /*begin=*/{0, 0, -1, 0},
5895 /*size=*/{1, 1, 2, 3},
5896 /*expected_output_dims=*/{},
5897 /*expected_output=*/{},
5898 /*conversion_status=*/
5899 errors::InvalidArgument("\"begin\" in Slice "
5900 "is out of range")},
5901 // In implicit batch mode, slicing the batch dimension is not allowed.
5902 TestParams{/*input_dims=*/{2, 1, 1, 3},
5903 /*partial_input_dims=*/{-1, -1, -1, -1},
5904 /*begin=*/{0, 0, 0, 0},
5905 /*size=*/{1, 1, 1, 3},
5906 /*expected_output_dims=*/{1, 1, 1, 3},
5907 /*expected_output=*/{1, 2, 3},
5908 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
5909 ? errors::Unimplemented(
5910 "TensorRT does not allow modifications to the batch "
5911 "dimension in implicit batch mode")
5912 : Status::OK()},
5913 // Dynamic batch size but using size[0] of -1, ok.
5914 TestParams{{1, 1, 2, 3},
5915 /*partial_input_dims=*/{-1, -1, -1, -1},
5916 {0, 0, 0, 0},
5917 {-1, 1, 2, 2},
5918 {1, 1, 2, 2},
5919 {1, 2, 4, 5},
5920 Status::OK()},
5921 TestParams{{1, 1, 2, 3},
5922 /*partial_input_dims=*/{-1, -1, -1, -1},
5923 {0, 0, 0, 0},
5924 {-1, -1, -1, -1},
5925 {1, 1, 2, 3},
5926 {1, 2, 3, 4, 5, 6},
5927 Status::OK()},
5928 TestParams{{1, 1, 2, 3},
5929 /*partial_input_dims=*/{-1, -1, -1, -1},
5930 {0, 0, 0, 0},
5931 {1, 1, 2, 3},
5932 {1, 1, 2, 3},
5933 {1, 2, 3, 4, 5, 6}},
5934 TestParams{{1, 1, 2, 3},
5935 /*partial_input_dims=*/{-1, -1, -1, -1},
5936 /*begin=*/{0, 0, 0, 0},
5937 /*size=*/{1, -1, 2, 2},
5938 /*expected_output_dims=*/{1, 1, 2, 2},
5939 /*expected_output=*/{1, 2, 4, 5},
5940 Status::OK()},
5941 TestParams{/*input_dims=*/{1, 6},
5942 /*partial_input_dims=*/{-1, -1},
5943 /*being=*/{0, 1},
5944 /*size=*/{1, 5},
5945 /*expected_output_dims=*/{1, 5},
5946 /*expected_output=*/{2, 3, 4, 5, 6}},
5947 TestParams{/*input_dims=*/{1, 6},
5948 /*partial_input_dims=*/{-1, -1},
5949 /*begin=*/{0, 1},
5950 /*size=*/{-1, 3},
5951 /*expected_output_dims=*/{1, 3},
5952 /*expected_output=*/{2, 3, 4}, Status::OK()},
5953 // In dynamic shape mode we do not know the input shape during
5954 // conversion, therfore we cannot check out of bound access.
5955 TestParams{
5956 {1, 1, 2, 3},
5957 /*partial_input_dims=*/{-1, -1, -1, -1},
5958 /*begin=*/{0, 0, 3, 0},
5959 /*end=*/{1, 1, 2, 3},
5960 {},
5961 {},
5962 trt_mode_ == TrtTestMode::kDynamicShape
5963 ? Status::OK()
5964 : errors::InvalidArgument("\"begin\" + \"size\" for dimension "
5965 "2 in Slice is out of range"),
5966 errors::Internal("Internal: Failed to build TensorRT engine")},
5967 // The slice operation should expect that the "size[i]" values are not
5968 // less than -1.
5969 TestParams{/*input_dims=*/{1, 1, 2, 3},
5970 /*partial_input_dims=*/{-1, -1, -1, -1},
5971 /*begin=*/{0, 0, 0, 0},
5972 /*size=*/{1, 1, 2, -2},
5973 {},
5974 {},
5975 errors::InvalidArgument("\"size\" in Slice is out of range")},
5976 TestParams{
5977 /*input_dims=*/{1, 1, 2, 3},
5978 /*partial_input_dims=*/{-1, -1, -1, -1},
5979 /*begin=*/{0, 0, 0, 0},
5980 /*size=*/{1, 1, 3, 2},
5981 /*expected_output_dims=*/{},
5982 /*expected_output=*/{},
5983 /*conversion_status=*/trt_mode_ == TrtTestMode::kDynamicShape
5984 ? Status::OK()
5985 : errors::InvalidArgument("\"begin\" + \"size\" for dimension "
5986 "2 in Slice is out of range"),
5987 errors::Internal("Internal: Failed to build TensorRT engine")},
5988 };
5989
5990 logger_.unsuppressAllLoggerMsgs();
5991 int i = 0;
5992 for (auto p : params) {
5993 Reset();
5994 NodeDef node_def = get_slice_nodedef(tf_type_);
5995
5996 VLOG(2) << "Preparing test case " << i++ << " with dims "
5997 << DebugString(p.input_dims);
5998
5999 // The input tensor always has size 6.
6000 std::vector<int> input_vals = {1, 2, 3, 4, 5, 6};
6001
6002 switch (trt_mode_) {
6003 case TrtTestMode::kImplicitBatch: {
6004 AddTestTensor("input", p.input_dims, input_vals);
6005 break;
6006 }
6007 case TrtTestMode::kExplicitBatch: {
6008 AddTestTensor("input", p.input_dims, input_vals);
6009 break;
6010 }
6011 case TrtTestMode::kDynamicShape: {
6012 if (p.partial_input_dims.size() > 0) {
6013 AddTestTensor("input", p.input_dims, tf_type_, input_vals,
6014 p.partial_input_dims);
6015
6016 } else {
6017 AddTestTensor("input", p.input_dims, tf_type_, input_vals,
6018 p.input_dims);
6019 }
6020 break;
6021 }
6022 }
6023
6024 AddTestWeights<int32>("begin", {static_cast<int>(p.begin.size())}, p.begin);
6025 AddTestWeights<int32>("size", {static_cast<int>(p.size.size())}, p.size);
6026
6027 const bool flag =
6028 trt_mode_ == TrtTestMode::kDynamicShape && (i == 9 || i == 11);
6029 if (flag) logger_.suppressLoggerMsgs(nvinfer1::ILogger::Severity::kERROR);
6030
6031 TestOpConverter("my_slice", node_def, p.expected_output_dims,
6032 p.conversion_status, p.runtime_status,
6033 ElementsAreArray(p.expected_output));
6034 if (flag) logger_.unsuppressLoggerMsgs(nvinfer1::ILogger::Severity::kERROR);
6035 }
6036 }
6037
TEST_P(OpConverter_FP32_Test,ConvertConv2D)6038 TEST_P(OpConverter_FP32_Test, ConvertConv2D) {
6039 // Get nodedef for Conv2D layer.
6040 DataType tf_type = tf_type_;
6041 auto get_conv2d_nodedef =
6042 [tf_type](std::vector<int> strides = {1, 1, 1, 1},
6043 string padding = "SAME", string data_format = "NCHW",
6044 std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
6045 Scope s = Scope::NewRootScope();
6046 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
6047 auto filter = ops::Placeholder(s.WithOpName("weights"), tf_type);
6048 ops::Conv2D::Attrs attrs =
6049 ops::Conv2D::Attrs().DataFormat(data_format).Dilations(dilations);
6050 auto conv2d = ops::Conv2D(s.WithOpName("my_conv2d"), input, filter, strides,
6051 padding, attrs);
6052 return conv2d.operation.node()->def();
6053 };
6054
6055 {
6056 // Input is weights, should fail.
6057 Reset();
6058 NodeDef node_def = get_conv2d_nodedef();
6059 AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
6060 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6061 RunValidationAndConversion(
6062 node_def, error::UNIMPLEMENTED,
6063 "The input \"input\" for Conv2D must be a tensor");
6064 }
6065 {
6066 // Filter is tensor, should fail.
6067 Reset();
6068 NodeDef node_def = get_conv2d_nodedef();
6069 AddTestTensor("input", {3, 1, 2, 1});
6070 AddTestTensor("weights", {3, 3, 1, 1});
6071 RunValidationAndConversion(
6072 node_def, error::UNIMPLEMENTED,
6073 "The input \"filter\" for Conv2D must be a constant");
6074 }
6075 {
6076 // Filter is not 4D, should fail.
6077 Reset();
6078 NodeDef node_def = get_conv2d_nodedef();
6079 AddTestTensor("input", {1, 1, 2, 3});
6080 AddTestWeights<float>("weights", {3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6081 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6082 "Conv2D expects kernel of dimension 4");
6083 }
6084 {
6085 // Dilations is not 4D, should fail.
6086 Reset();
6087 NodeDef node_def =
6088 get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 1, 1});
6089 AddTestTensor("input", {1, 1, 2, 3});
6090 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6091 RunValidationAndConversion(
6092 node_def, error::INVALID_ARGUMENT,
6093 "Convolution dilations field must specify 4 dimensions");
6094 }
6095 {
6096 // Dilation value is not 1 for channel, should fail.
6097 Reset();
6098 NodeDef node_def =
6099 get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NCHW", {1, 2, 1, 1});
6100 AddTestTensor("input", {1, 1, 2, 3});
6101 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6102 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6103 "Dilation rate must be 1 for batch and channel "
6104 "dimensions");
6105 }
6106 {
6107 // Dilation value is not 1 for channel (NHWC), should fail.
6108 Reset();
6109 NodeDef node_def =
6110 get_conv2d_nodedef({1, 1, 1, 1}, "SAME", "NHWC", {1, 1, 1, 2});
6111 AddTestTensor("input", {1, 2, 3, 1});
6112 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6113 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6114 "Dilation rate must be 1 for batch and channel "
6115 "dimensions");
6116 }
6117 {
6118 // Strides is not 4D, should fail.
6119 Reset();
6120 NodeDef node_def =
6121 get_conv2d_nodedef({1, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
6122 AddTestTensor("input", {1, 1, 2, 3});
6123 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6124 RunValidationAndConversion(
6125 node_def, error::INVALID_ARGUMENT,
6126 "Convolution strides field must specify 4 dimensions");
6127 }
6128 {
6129 // Stride value is not 1 for channel, should fail.
6130 Reset();
6131 NodeDef node_def =
6132 get_conv2d_nodedef({1, 2, 1, 1}, "SAME", "NCHW", {1, 1, 1, 1});
6133 AddTestTensor("input", {1, 1, 2, 3});
6134 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6135 RunValidationAndConversion(
6136 node_def, error::UNIMPLEMENTED,
6137 "Stride must be 1 for batch and channel dimensions");
6138 }
6139 if (trt_mode_ == TrtTestMode::kDynamicShape) {
6140 Reset();
6141 NodeDef node_def = get_conv2d_nodedef();
6142 // Channel dim unknown, should fail.
6143 nvinfer1::DataType trt_type;
6144 TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type));
6145 AddTestTensorWithTFDims("input", {-1, -1, -1, -1}, trt_type);
6146 AddTestWeights<float>("weights", {1, 2, 1, 1}, {-1, 1});
6147 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6148 "Channel dimension must be static");
6149 }
6150
6151 struct TestParams {
6152 std::vector<int> input_dims;
6153 std::vector<float> input;
6154 std::vector<int> filter_dims;
6155 std::vector<float> filter;
6156 std::vector<int> strides;
6157 string padding;
6158 string data_format;
6159 std::vector<int> dilations;
6160 std::vector<int> expected_output_dims;
6161 std::vector<float> expected_output;
6162 };
6163
6164 // Ok.
6165 std::vector<TestParams> ok_params = {
6166 // Basic
6167 TestParams{/*input_dims=*/{1, 1, 2, 3},
6168 /*input=*/{0, 1, 2, 3, 3, 4},
6169 /*filter_dims=*/{1, 2, 1, 1},
6170 /*filter=*/{-1, 1},
6171 /*strides=*/{1, 1, 1, 1},
6172 /*padding=*/"VALID",
6173 /*data_format=*/"NCHW",
6174 /*dilations=*/{1, 1, 1, 1},
6175 /*expected_output_dims=*/{1, 1, 2, 2},
6176 /*expected_output=*/{1, 1, 0, 1}},
6177 // SAME padding (Asymmetric)
6178 TestParams{/*input_dims=*/{1, 1, 2, 3},
6179 /*input=*/{0, 1, 2, 3, 3, 4},
6180 /*filter_dims=*/{1, 2, 1, 1},
6181 /*filter=*/{-1, 1},
6182 /*strides=*/{1, 1, 1, 1},
6183 /*padding=*/"SAME",
6184 /*data_format=*/"NCHW",
6185 /*dilations=*/{1, 1, 1, 1},
6186 /*expected_output_dims=*/{1, 1, 2, 3},
6187 /*expected_output=*/{1, 1, -2, 0, 1, -4}},
6188 // SAME padding (Symmetric)
6189 TestParams{/*input_dims=*/{1, 1, 2, 3},
6190 /*input=*/{0, 1, 2, 3, 3, 4},
6191 /*filter_dims=*/{1, 3, 1, 1},
6192 /*filter=*/{-1, 0, 1},
6193 /*strides=*/{1, 1, 1, 1},
6194 /*padding=*/"SAME",
6195 /*data_format=*/"NCHW",
6196 /*dilations=*/{1, 1, 1, 1},
6197 /*expected_output_dims=*/{1, 1, 2, 3},
6198 /*expected_output=*/{1, 2, -1, 3, 1, -3}},
6199 // NHWC
6200 TestParams{/*input_dims=*/{1, 2, 3, 1},
6201 /*input=*/{0, 1, 2, 3, 3, 4},
6202 /*filter_dims=*/{1, 2, 1, 1},
6203 /*filter=*/{-1, 1},
6204 /*strides=*/{1, 1, 1, 1},
6205 /*padding=*/"VALID",
6206 /*data_format=*/"NHWC",
6207 /*dilations=*/{1, 1, 1, 1},
6208 /*expected_output_dims=*/{1, 2, 2, 1},
6209 /*expected_output=*/{1, 1, 0, 1}},
6210 // Dilated
6211 TestParams{/*input_dims=*/{1, 1, 2, 3},
6212 /*input=*/{0, 1, 2, 3, 3, 4},
6213 /*filter_dims=*/{1, 2, 1, 1},
6214 /*filter=*/{-1, 1},
6215 /*strides=*/{1, 1, 1, 1},
6216 /*padding=*/"VALID",
6217 /*data_format=*/"NCHW",
6218 /*dilations=*/{1, 1, 1, 2},
6219 /*expected_output_dims=*/{1, 1, 2, 1},
6220 /*expected_output=*/{2, 1}},
6221 // Strided
6222 TestParams{/*input_dims=*/{1, 1, 2, 4},
6223 /*input=*/{0, 1, 2, 2, 3, 4, 4, 7},
6224 /*filter_dims=*/{1, 2, 1, 1},
6225 /*filter=*/{-1, 1},
6226 /*strides=*/{1, 1, 1, 2},
6227 /*padding=*/"VALID",
6228 /*data_format=*/"NCHW",
6229 /*dilations=*/{1, 1, 1, 1},
6230 /*expected_output_dims=*/{1, 1, 2, 2},
6231 /*expected_output=*/{1, 0, 1, 3}},
6232 };
6233
6234 for (int i = 0; i < ok_params.size(); i++) {
6235 Reset();
6236 NodeDef node_def =
6237 get_conv2d_nodedef(ok_params[i].strides, ok_params[i].padding,
6238 ok_params[i].data_format, ok_params[i].dilations);
6239 std::vector<int> partial_input_shape;
6240 if (trt_mode_ == TrtTestMode::kDynamicShape) {
6241 // The channel dim cannot have unknown size, fix that.
6242 partial_input_shape.resize(ok_params[i].input_dims.size(), -1);
6243 int channel_id = (ok_params[i].data_format == "NCHW") ? 1 : 3;
6244 partial_input_shape[channel_id] = ok_params[i].input_dims[channel_id];
6245 }
6246
6247 AddTestTensor("input", ok_params[i].input_dims, tf_type_,
6248 ok_params[i].input, partial_input_shape);
6249 AddTestWeights<float>("weights", ok_params[i].filter_dims,
6250 ok_params[i].filter);
6251
6252 TestOpConverter("my_conv2d", node_def, ok_params[i].expected_output_dims,
6253 Status::OK(), Status::OK(),
6254 ElementsAreArray(ok_params[i].expected_output));
6255 }
6256 }
6257
TEST_P(OpConverter_FP32_Test,ConvertConv2DBackpropInput)6258 TEST_P(OpConverter_FP32_Test, ConvertConv2DBackpropInput) {
6259 // Get nodedef for Conv2D layer.
6260 auto get_conv2d_backprop_input_nodedef =
6261 [](DataType tf_type, std::vector<int> strides = {1, 1, 1, 1},
6262 string padding = "SAME", string data_format = "NCHW",
6263 std::vector<int> dilations = {1, 1, 1, 1}) -> NodeDef {
6264 Scope s = Scope::NewRootScope();
6265 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
6266 auto filter = ops::Placeholder(s.WithOpName("weights"), tf_type);
6267 auto input_sizes = ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
6268 ops::Conv2DBackpropInput::Attrs attrs = ops::Conv2DBackpropInput::Attrs()
6269 .DataFormat(data_format)
6270 .Dilations(dilations);
6271 auto conv2d = ops::Conv2DBackpropInput(
6272 s.WithOpName("my_conv2d_backprop_input"), input_sizes, filter, input,
6273 strides, padding, attrs);
6274 return conv2d.operation.node()->def();
6275 };
6276
6277 struct TestParams {
6278 std::vector<int> input_dims;
6279 std::vector<float> input;
6280 std::vector<int> filter_dims;
6281 std::vector<float> filter;
6282 std::vector<int> strides;
6283 string padding;
6284 string data_format;
6285 std::vector<int> dilations;
6286 std::vector<int> expected_output_dims;
6287 std::vector<float> expected_output;
6288 Status conversion_status;
6289 // For dynamic shape mode, we must use the partial_input_dims for
6290 // creating the test tensor if any of the input_dims are -1.
6291 std::vector<int> partial_input_dims;
6292 };
6293
6294 // Ok.
6295 std::vector<TestParams> params = {
6296 // Transpose Strided
6297 TestParams{/*input_dims=*/{1, 1, 2, 2},
6298 /*input=*/{0, 1, 2, 3},
6299 /*filter_dims=*/{1, 2, 1, 1},
6300 /*filter=*/{-1, 1},
6301 /*strides=*/{1, 1, 1, 2},
6302 /*padding=*/"SAME",
6303 /*data_format=*/"NCHW",
6304 /*dilations=*/{1, 1, 1, 1},
6305 /*expected_output_dims=*/{1, 1, 2, 4},
6306 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
6307 // Transpose Strided NHWC
6308 TestParams{/*input_dims=*/{1, 2, 2, 1},
6309 /*input=*/{0, 1, 2, 3},
6310 /*filter_dims=*/{1, 2, 1, 1},
6311 /*filter=*/{-1, 1},
6312 /*strides=*/{1, 1, 2, 1},
6313 /*padding=*/"SAME",
6314 /*data_format=*/"NHWC",
6315 /*dilations=*/{1, 1, 1, 1},
6316 /*expected_output_dims=*/{1, 2, 4, 1},
6317 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3}},
6318 // Transpose Strided NHWC with VALID padding
6319 TestParams{/*input_dims=*/{1, 3, 1, 1},
6320 /*input=*/{0, 1, 2},
6321 /*filter_dims=*/{2, 1, 1, 1},
6322 /*filter=*/{-1, 1},
6323 /*strides=*/{1, 2, 1, 1},
6324 /*padding=*/"VALID",
6325 /*data_format=*/"NHWC",
6326 /*dilations=*/{1, 1, 1, 1},
6327 /*expected_output_dims=*/{1, 7, 1, 1},
6328 /*expected_output=*/{0, 0, -1, 1, -2, 2, 0}},
6329 TestParams{/*input_dims=*/{1, 1, 2, 2},
6330 /*input=*/{0, 1, 2, 3},
6331 /*filter_dims=*/{1, 2, 1, 1},
6332 /*filter=*/{-1, 1},
6333 /*strides=*/{1, 1, 1, 2},
6334 /*padding=*/"EXPLICIT",
6335 /*data_format=*/"NCHW",
6336 /*dilations=*/{1, 1, 1, 1},
6337 /*expected_output_dims=*/{1, 1, 2, 4},
6338 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3},
6339 errors::Unimplemented("EXPLICIT padding type not "
6340 "implemented, only VALID and SAME are"
6341 " supported")},
6342 // Dilation + Conv2DBackpropInput, should fail.
6343 TestParams{/*input_dims=*/{1, 1, 2, 2},
6344 /*input=*/{0, 1, 2, 3},
6345 /*filter_dims=*/{1, 2, 1, 1},
6346 /*filter=*/{-1, 1},
6347 /*strides=*/{1, 1, 1, 1},
6348 /*padding=*/"SAME",
6349 /*data_format=*/"NCHW",
6350 /*dilations=*/{1, 1, 1, 2},
6351 {1, 1, 2, 2},
6352 {},
6353 errors::Unimplemented("Dilation with Conv2DBackpropInput "
6354 "(conv2d_transpose) is not supported")},
6355 };
6356 if (trt_mode_ == TrtTestMode::kDynamicShape) {
6357 params.push_back(
6358 TestParams{/*input_dims=*/{1, 1, 2, 2},
6359 /*input=*/{0, 1, 2, 3},
6360 /*filter_dims=*/{1, 2, 1, 1},
6361 /*filter=*/{-1, 1},
6362 /*strides=*/{1, 1, 1, 2},
6363 /*padding=*/"SAME",
6364 /*data_format=*/"NCHW",
6365 /*dilations=*/{1, 1, 1, 1},
6366 /*expected_output_dims=*/{1, 1, 2, 4},
6367 /*expected_output=*/{0, 0, -1, 1, -2, 2, -3, 3},
6368 errors::InvalidArgument("Channel dimension must be static"),
6369 /*partial input dims=*/{1, -1, 2, 2}});
6370 // Test dynamic batch dimension.
6371 params.push_back(
6372 TestParams{/*input_dims=*/{2, 1, 2, 2},
6373 /*input=*/
6374 // clang-format off
6375 {0, 1, 2, 3,
6376 3, 2, 1, 0},
6377 // clang-format on
6378 /*filter_dims=*/{1, 2, 1, 1},
6379 /*filter=*/{-1, 1},
6380 /*strides=*/{1, 1, 1, 2},
6381 /*padding=*/"SAME",
6382 /*data_format=*/"NCHW",
6383 /*dilations=*/{1, 1, 1, 1},
6384 /*expected_output_dims=*/{2, 1, 2, 4},
6385 /*expected_output=*/
6386 // clang-format off
6387 { 0, 0, -1, 1, -2, 2, -3, 3,
6388 -3, 3, -2, 2, -1, 1, 0, 0},
6389 // clang-format on
6390 /*conversion_status=*/Status::OK(),
6391 /*partial input dims=*/{-1, 1, 2, 2}});
6392
6393 // Test dynamic height and width.
6394 params.push_back(TestParams{
6395 /*input_dims=*/{1, 1, 2, 2},
6396 /*input=*/{0, 1, 2, 3},
6397 /*filter_dims=*/{1, 2, 1, 1},
6398 /*filter=*/{-1, 1},
6399 /*strides=*/{1, 1, 1, 2},
6400 /*padding=*/"SAME",
6401 /*data_format=*/"NCHW",
6402 /*dilations=*/{1, 1, 1, 1},
6403 /*expected_output_dims=*/{1, 1, 2, 4},
6404 /*expected_output=*/
6405 {0, 0, -1, 1, -2, 2, -3, 3},
6406 /*conversion_status=*/
6407 errors::Unimplemented(
6408 "Conv2dBackpropInput does not support input with unknown spatial "
6409 "shape"),
6410 /*partial input dims=*/{1, 1, -1, -1}});
6411 }
6412 for (auto p : params) {
6413 for (int input_sizes_length : {2, 4}) {
6414 Reset();
6415 NodeDef node_def = get_conv2d_backprop_input_nodedef(
6416 tf_type_, p.strides, p.padding, p.data_format, p.dilations);
6417
6418 switch (trt_mode_) {
6419 case TrtTestMode::kImplicitBatch: {
6420 AddTestTensor("input", p.input_dims, p.input);
6421 break;
6422 }
6423 case TrtTestMode::kExplicitBatch: {
6424 AddTestTensor("input", p.input_dims, p.input);
6425 break;
6426 }
6427 case TrtTestMode::kDynamicShape: {
6428 AddTestTensor("input", p.input_dims, tf_type_, p.input,
6429 p.partial_input_dims.size() > 0 ? p.partial_input_dims
6430 : p.input_dims);
6431 break;
6432 }
6433 default: {
6434 ASSERT_TRUE(false) << "unknown test mode";
6435 }
6436 }
6437
6438 AddTestWeights<float>("weights", p.filter_dims, p.filter, tf_type_);
6439
6440 if (input_sizes_length == 4) {
6441 AddTestWeights<int>("input_sizes", {4}, p.expected_output_dims);
6442 } else {
6443 std::vector<int> tf_input_sizes(2);
6444 // Remove the channel and batch dimensions.
6445 if (p.data_format == "NHWC") {
6446 std::copy(p.expected_output_dims.begin() + 1,
6447 p.expected_output_dims.end() - 1, tf_input_sizes.begin());
6448 } else {
6449 std::copy(p.expected_output_dims.begin() + 2,
6450 p.expected_output_dims.end(), tf_input_sizes.begin());
6451 }
6452 QCHECK_EQ(2, tf_input_sizes.size());
6453 AddTestWeights<int>("input_sizes", {2}, tf_input_sizes);
6454 }
6455
6456 TestOpConverter("my_conv2d_backprop_input", node_def,
6457 p.expected_output_dims, p.conversion_status, Status::OK(),
6458 ElementsAreArray(p.expected_output));
6459 }
6460 }
6461 }
6462
6463 // Get the NodeDef for Pack.
GetConv3DNodeDef(std::vector<int> strides={1, 1, 1, 1, 1},string padding="SAME",string data_format="NCDHW",std::vector<int> dilations={1, 1, 1, 1, 1},bool is_conv3d_backprop_input=false)6464 NodeDef GetConv3DNodeDef(std::vector<int> strides = {1, 1, 1, 1, 1},
6465 string padding = "SAME", string data_format = "NCDHW",
6466 std::vector<int> dilations = {1, 1, 1, 1, 1},
6467 bool is_conv3d_backprop_input = false) {
6468 Scope s = Scope::NewRootScope();
6469 auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
6470 auto filter = ops::Placeholder(s.WithOpName("weights"), DT_FLOAT);
6471
6472 if (is_conv3d_backprop_input) {
6473 auto input_sizes = ops::Placeholder(s.WithOpName("input_sizes"), DT_INT32);
6474 ops::Conv3DBackpropInputV2::Attrs attrs =
6475 ops::Conv3DBackpropInputV2::Attrs()
6476 .DataFormat(data_format)
6477 .Dilations(dilations);
6478 auto conv3d =
6479 ops::Conv3DBackpropInputV2(s.WithOpName("my_conv3d"), input_sizes,
6480 filter, input, strides, padding, attrs);
6481 return conv3d.operation.node()->def();
6482 } else {
6483 ops::Conv3D::Attrs attrs =
6484 ops::Conv3D::Attrs().DataFormat(data_format).Dilations(dilations);
6485 auto conv3d = ops::Conv3D(s.WithOpName("my_conv3d"), input, filter, strides,
6486 padding, attrs);
6487 return conv3d.operation.node()->def();
6488 }
6489 }
6490
6491 struct Conv3DTestParams {
6492 std::vector<int> input_dims;
6493 std::vector<float> input;
6494 std::vector<int> filter_dims;
6495 std::vector<float> filter;
6496 std::vector<int> strides;
6497 string padding;
6498 string data_format;
6499 std::vector<int> dilations;
6500 bool is_conv3d_backprop;
6501 std::vector<int> expected_output_dims;
6502 std::vector<float> expected_output;
6503 bool allow_dynamic_channel_dim;
6504 Status validation_status;
6505 };
6506
TestConv3D(ParameterizedOpConverterTestBase * test,Conv3DTestParams & p)6507 void TestConv3D(ParameterizedOpConverterTestBase* test, Conv3DTestParams& p) {
6508 test->Reset();
6509 NodeDef node_def = GetConv3DNodeDef(p.strides, p.padding, p.data_format,
6510 p.dilations, p.is_conv3d_backprop);
6511
6512 std::vector<int> partial_input_shape;
6513 if (!p.allow_dynamic_channel_dim &&
6514 test->get_trt_mode() == TrtTestMode::kDynamicShape) {
6515 // The channel dim cannot have unknown size, fix that.
6516 partial_input_shape.resize(p.input_dims.size(), -1);
6517 int channel_id = (p.data_format == "NCDHW") ? 1 : 4;
6518 partial_input_shape[channel_id] = p.input_dims[channel_id];
6519 }
6520
6521 test->AddTestTensor("input", p.input_dims, test->get_tf_type(), p.input,
6522 partial_input_shape);
6523 test->AddTestWeights<float>("weights", p.filter_dims, p.filter);
6524
6525 if (p.is_conv3d_backprop) {
6526 test->AddTestWeights<float>("input_sizes",
6527 {static_cast<int>(p.expected_output.size())},
6528 p.expected_output);
6529 }
6530
6531 test->TestOpConverter("my_conv3d", node_def, p.expected_output_dims,
6532 /*expected_conversion_status=*/p.validation_status,
6533 /*expected_runtime_status=*/Status::OK(),
6534 /*matcher=*/ElementsAreArray(p.expected_output),
6535 /*out_tf_types=*/{test->get_tf_type()});
6536 }
6537
TEST_P(OpConverter_FP32_FP16_Test,ConvertConv3D)6538 TEST_P(OpConverter_FP32_FP16_Test, ConvertConv3D) {
6539 {
6540 // Input is weights, should fail.
6541 Reset();
6542 NodeDef node_def = GetConv3DNodeDef();
6543
6544 AddTestWeights<float>("input", {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
6545 AddTestWeights<float>("weights", {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6546 RunValidationAndConversion(
6547 node_def, error::UNIMPLEMENTED,
6548 "The input \"input\" for Conv3D must be a tensor");
6549 }
6550 {
6551 // Filter is tensor, should fail.
6552 Reset();
6553 NodeDef node_def = GetConv3DNodeDef();
6554 AddTestTensor("input", {1, 1, 2, 3}, tf_type_, CreateVectorIota<float>(6));
6555 AddTestTensor("weights", {1, 3, 3, 1}, tf_type_,
6556 CreateVectorIota<float>(9));
6557 RunValidationAndConversion(
6558 node_def, error::UNIMPLEMENTED,
6559 "The input \"filter\" for Conv3D must be a constant");
6560 }
6561 {
6562 // Filter is not 5D, should fail.
6563 Reset();
6564 NodeDef node_def = GetConv3DNodeDef();
6565 AddTestTensor("input", {1, 1, 2, 3}, tf_type_, CreateVectorIota<float>(6));
6566 AddTestWeights<float>("weights", {3, 3, 1, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9});
6567 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
6568 "Conv3D expects kernel of dimension 5");
6569 }
6570 {
6571 // Dilations is not 5D, should fail.
6572 Reset();
6573 NodeDef node_def =
6574 GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1});
6575 AddTestTensor("input", {1, 1, 2, 3}, tf_type_, CreateVectorIota<float>(6));
6576 AddTestWeights<float>(
6577 "weights", {3, 3, 1, 1, 1},
6578 {1, 2, 3, 4, 5, 6, 7, 8, 9}); // Dimensions, then values
6579 RunValidationAndConversion(
6580 node_def, error::INVALID_ARGUMENT,
6581 "Convolution dilations field must specify 5 dimensions");
6582 }
6583 {
6584 // Dilation value is not 1 for channel, should fail.
6585 Reset();
6586 NodeDef node_def =
6587 GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 2, 1, 1, 1});
6588 AddTestTensor("input", {1, 1, 2, 3}, tf_type_, CreateVectorIota<float>(6));
6589 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
6590 {1, 2, 3, 4, 5, 6, 7, 8, 9});
6591 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6592 "Dilation rate must be 1 for batch and channel "
6593 "dimensions");
6594 }
6595 {
6596 // Dilation value is not 1 for channel (NDHWC), should fail.
6597 Reset();
6598 NodeDef node_def =
6599 GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NDHWC", {1, 1, 1, 1, 2});
6600 AddTestTensor("input", {1, 2, 3, 1}, tf_type_, CreateVectorIota<float>(6));
6601 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
6602 {1, 2, 3, 4, 5, 6, 7, 8, 9});
6603 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6604 "Dilation rate must be 1 for batch and channel "
6605 "dimensions");
6606 }
6607 {
6608 // Dilation + Conv3DBackpropInputV2, should fail.
6609 Reset();
6610 NodeDef node_def = GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
6611 {1, 1, 2, 1, 1}, true);
6612 AddTestTensor("input", {1, 2, 3, 1}, tf_type_, CreateVectorIota<float>(6));
6613 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
6614 {1, 2, 3, 4, 5, 6, 7, 8, 9});
6615 AddTestWeights<int>("input_sizes", {4}, {1, 2, 3, 1});
6616 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6617 "Dilation with Conv3DBackpropInputV2 "
6618 "(conv3d_transpose) is not supported");
6619 }
6620 {
6621 // Asymmetric+ Conv3DBackpropInputV2, should fail.
6622 Reset();
6623 NodeDef node_def = GetConv3DNodeDef({1, 1, 1, 1, 1}, "SAME", "NDHWC",
6624 {1, 1, 1, 1, 1}, true);
6625 AddTestTensor("input", {1, 2, 2, 2}, tf_type_, CreateVectorIota<float>(8));
6626 AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
6627 AddTestWeights<int>("input_sizes", {8}, {1, 2, 3, 4, 5, 6, 7, 8});
6628 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
6629 "Asymmetric padding with Conv3DBackpropInputV2 "
6630 "(conv3d_transpose) is not supported");
6631 }
6632 {
6633 // Strides is not 5D, should fail.
6634 Reset();
6635 NodeDef node_def =
6636 GetConv3DNodeDef({1, 1, 1, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1, 1});
6637 AddTestTensor("input", {1, 2, 2, 2}, tf_type_, CreateVectorIota<float>(8));
6638 AddTestWeights<float>("weights", {1, 1, 2, 1, 1}, {1, 1});
6639 RunValidationAndConversion(
6640 node_def, error::INVALID_ARGUMENT,
6641 "Convolution strides field must specify 5 dimensions");
6642 }
6643 {
6644 // Stride value is not 1 for channel, should fail.
6645 Reset();
6646 NodeDef node_def =
6647 GetConv3DNodeDef({1, 2, 1, 1, 1}, "SAME", "NCDHW", {1, 1, 1, 1, 1});
6648 AddTestTensor("input", {1, 1, 2, 3}, tf_type_, CreateVectorIota<float>(6));
6649 AddTestWeights<float>("weights", {3, 3, 1, 1, 1},
6650 {1, 2, 3, 4, 5, 6, 7, 8, 9});
6651 RunValidationAndConversion(
6652 node_def, error::UNIMPLEMENTED,
6653 "Stride must be 1 for batch and channel dimensions");
6654 }
6655
6656 // Start here
6657 std::vector<Conv3DTestParams> ok_params = {
6658 // Basic - just 1x1 conv - input = output
6659 {/*input_dims=*/{1, 1, 3, 3, 3}, // CDHW
6660 /*input=*/{1, 2, 15, 3, 6, -3, 22, 1, 88, 56, 36, 1, 1, 105,
6661 1, 16, -28, 1, 42, 9, 3, 1, 7, 1, 11, 61, 5},
6662 /*filter_dims=*/{1, 1, 1, 1, 1}, // DRSCK
6663 /*filter=*/{1},
6664 /*strides=*/{1, 1, 1, 1, 1},
6665 /*padding=*/"VALID",
6666 /*data_format=*/"NCDHW",
6667 /*dilations=*/{1, 1, 1, 1, 1},
6668 /*is_conv3d_backprop=*/false,
6669 /*expected_output_dims=*/{1, 1, 3, 3, 3},
6670 /*expected_output=*/{1, 2, 15, 3, 6, -3, 22, 1, 88,
6671 56, 36, 1, 1, 105, 1, 16, -28, 1,
6672 42, 9, 3, 1, 7, 1, 11, 61, 5},
6673 /*allow_dynamic_channel_dim=*/false,
6674 /*validation_status=*/Status::OK()},
6675 // Basic - 2x1 filter
6676 {/*input_dims=*/{1, 1, 3, 3, 3}, // CDHW
6677 /*input=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6678 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6},
6679 /*filter_dims=*/{2, 1, 1, 1, 1}, // DRSCK
6680 /*filter=*/{1, 1},
6681 /*strides=*/{1, 1, 1, 1, 1},
6682 /*padding=*/"VALID",
6683 /*data_format=*/"NCDHW",
6684 /*dilations=*/{1, 1, 1, 1, 1},
6685 /*is_conv3d_backprop=*/false,
6686 /*expected_output_dims=*/{1, 1, 2, 3, 3},
6687 /*expected_output=*/
6688 {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7},
6689 /*allow_dynamic_channel_dim=*/false,
6690 /*validation_status=*/Status::OK()},
6691 // SAME padding (Asymmetric)
6692 {/*input_dims=*/{1, 1, 2, 3, 2}, // CDHW
6693 /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
6694 /*filter_dims=*/{2, 1, 1, 1, 1}, // DRSCK
6695 /*filter=*/{-1, 1},
6696 /*strides=*/{1, 1, 1, 1, 1},
6697 /*padding=*/"SAME",
6698 /*data_format=*/"NCDHW",
6699 /*dilations=*/{1, 1, 1, 1, 1},
6700 /*is_conv3d_backprop=*/false,
6701 /*expected_output_dims=*/{1, 1, 2, 3, 2},
6702 // Diff in first 2 depths is const 6.
6703 /*expected_output=*/{6, 6, 6, 6, 6, 6, -6, -7, -8, -9, -10, -11},
6704 /*allow_dynamic_channel_dim=*/false,
6705 /*validation_status=*/Status::OK()},
6706 // SAME padding (Symmetric)
6707 {/*input_dims=*/{1, 1, 2, 3, 2}, // CDHW
6708 /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
6709 /*filter_dims=*/{3, 1, 1, 1, 1}, // DRSCK
6710 /*filter=*/{-1, 0, 1},
6711 /*strides=*/{1, 1, 1, 1, 1},
6712 /*padding=*/"SAME",
6713 /*data_format=*/"NCDHW",
6714 /*dilations=*/{1, 1, 1, 1, 1},
6715 /*is_conv3d_backprop=*/false,
6716 /*expected_output_dims=*/{1, 1, 2, 3, 2},
6717 // Swaps front two depths, negates
6718 /*expected_output=*/{6, 7, 8, 9, 10, 11, 0, -1, -2, -3, -4, -5},
6719 /*allow_dynamic_channel_dim=*/false,
6720 /*validation_status=*/Status::OK()
6721
6722 },
6723 // NDHWC (multi-channel)
6724 {/*input_dims=*/{1, 2, 3, 2, 2}, // DHWC
6725 /*input=*/{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
6726 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
6727 /*filter_dims=*/{2, 1, 1, 2, 1}, // DRSCK
6728 /*filter=*/{-1, 1, 1, -1},
6729 /*strides=*/{1, 1, 1, 1, 1},
6730 /*padding=*/"VALID",
6731 /*data_format=*/"NDHWC",
6732 /*dilations=*/{1, 1, 1, 1, 1},
6733 /*is_conv3d_backprop=*/false,
6734 /*expected_output_dims=*/{1, 1, 3, 2, 1},
6735 /*expected_output=*/{0, 0, 0, 0, 0, 0}, // Filters oppose each-other
6736 /*allow_dynamic_channel_dim=*/false,
6737 /*validation_status=*/Status::OK()},
6738 // Dilated
6739 {/*input_dims=*/{1, 1, 3, 3, 3}, // CDHW
6740 /*input=*/{1, 1, 1, 1, 1, 1, 1, 1, 1, -10, -10, -10, -10, -10,
6741 -10, -10, -10, -10, 7, 7, 7, 7, 7, 7, 7, 7, 7},
6742 /*filter_dims=*/{2, 1, 1, 1, 1}, // DRSCK
6743 /*filter=*/{1, 1},
6744 /*strides=*/{1, 1, 1, 1, 1},
6745 /*padding=*/"VALID",
6746 /*data_format=*/"NCDHW",
6747 /*dilations=*/{1, 1, 2, 1, 1},
6748 /*is_conv3d_backprop=*/false,
6749 /*expected_output_dims=*/{1, 1, 1, 3, 3},
6750 // Only front depth is valid, skips neg values
6751 /*expected_output=*/{8, 8, 8, 8, 8, 8, 8, 8, 8},
6752 /*allow_dynamic_channel_dim=*/false,
6753 /*validation_status=*/Status::OK()},
6754 // Strided
6755 {/*input_dims=*/{1, 1, 3, 3, 3},
6756 /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
6757 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
6758 /*filter_dims=*/{1, 1, 1, 1, 1},
6759 /*filter=*/{1},
6760 /*strides=*/{1, 1, 2, 2, 2},
6761 /*padding=*/"VALID",
6762 /*data_format=*/"NCDHW",
6763 /*dilations=*/{1, 1, 1, 1, 1},
6764 /*is_conv3d_backprop=*/false,
6765 /*expected_output_dims=*/{1, 1, 2, 2, 2},
6766 // Should only pick up the corners
6767 /*expected_output=*/{1, 2, 3, 4, 5, 6, 7, 8},
6768 /*allow_dynamic_channel_dim=*/false,
6769 /*validation_status=*/Status::OK()},
6770 // Transpose Strided
6771 {/*input_dims=*/{1, 1, 2, 2, 2}, // CDHW
6772 /*input=*/{1, 2, 3, 4, 5, 6, 7, 8},
6773 /*filter_dims=*/{1, 1, 1, 1, 1},
6774 /*filter=*/{1},
6775 /*strides=*/{1, 1, 2, 2, 2},
6776 /*padding=*/"VALID",
6777 /*data_format=*/"NCDHW",
6778 /*dilations=*/{1, 1, 1, 1, 1},
6779 /*is_conv3d_backprop=*/true,
6780 /*expected_output_dims=*/{1, 1, 3, 3, 3},
6781 /*expected_output=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, // Cube expands and
6782 0, 0, 0, 0, 0, 0, 0, 0, 0, // fills center
6783 5, 0, 6, 0, 0, 0, 7, 0, 8}, // with zeroes
6784 /*allow_dynamic_channel_dim=*/false,
6785 /*validation_status=*/Status::OK()},
6786 };
6787
6788 if (trt_mode_ == TrtTestMode::kDynamicShape) {
6789 ok_params.reserve(ok_params.size() + 2);
6790 const std::vector<float> common_input = CreateVectorIota<float>(3 * 3 * 3);
6791 // NCDHW - Dynamic Channel - Should fail in kDynamicShape
6792 ok_params.push_back(Conv3DTestParams{
6793 /*input_dims=*/{1, 1, 3, 3, 3},
6794 /*input=*/common_input,
6795 /*filter_dims=*/{1, 1, 1, 1, 1},
6796 /*filter=*/{1},
6797 /*strides=*/{1, 1, 2, 2, 2},
6798 /*padding=*/"VALID",
6799 /*data_format=*/"NCDHW",
6800 /*dilations=*/{1, 1, 1, 1, 1},
6801 /*is_conv3d_backprop=*/false,
6802 /*expected_output_dims=*/{}, // ignore, will fail anyway
6803 /*expected_output=*/{}, // ignore, will fail anyway
6804 /*allow_dynamic_channel_dim=*/true,
6805 /*validation_status=*/
6806 Status{error::INVALID_ARGUMENT, "Channel dimension must be static"}});
6807 // NDHWC - Dynamic Channel - Should fail in kDynamicShape
6808 ok_params.push_back(Conv3DTestParams{
6809 /*input_dims=*/{1, 3, 3, 3, 1},
6810 /*input=*/common_input,
6811 /*filter_dims=*/{1, 1, 1, 1, 1},
6812 /*filter=*/{1},
6813 /*strides=*/{1, 2, 2, 2, 1},
6814 /*padding=*/"VALID",
6815 /*data_format=*/"NDHWC",
6816 /*dilations=*/{1, 1, 1, 1, 1},
6817 /*is_conv3d_backprop=*/false,
6818 /*expected_output_dims=*/{}, // ignore, will fail anyway
6819 /*expected_output=*/{}, // ignore, will fail anyway
6820 /*allow_dynamic_channel_dim=*/true,
6821 /*validation_status=*/
6822 Status{error::INVALID_ARGUMENT, "Channel dimension must be static"}});
6823 }
6824
6825 for (auto p : ok_params) {
6826 TestConv3D(this, p);
6827 }
6828 }
6829
6830 template <typename T>
CreatePoolOp(DataType tf_type,std::vector<int> ksize,std::vector<int> strides,string padding,string data_format)6831 NodeDef CreatePoolOp(DataType tf_type, std::vector<int> ksize,
6832 std::vector<int> strides, string padding,
6833 string data_format) {
6834 Scope s = Scope::NewRootScope();
6835 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
6836 typename T::Attrs attrs;
6837 attrs.data_format_ = data_format;
6838 return T(s.WithOpName("my_pool"), input, ksize, strides, padding, attrs)
6839 .operation.node()
6840 ->def();
6841 }
TEST_P(OpConverter_FP32_Test,ConvertPool)6842 TEST_P(OpConverter_FP32_Test, ConvertPool) {
6843 // Get nodedef for MaxPool and AvgPool layers (2D or 3D).
6844 auto get_pool_nodedef =
6845 [](DataType tf_type, int nDim, std::vector<int> ksize = {},
6846 std::vector<int> strides = {}, string padding = "SAME",
6847 string data_format = "", const bool is_max_pooling = true) -> NodeDef {
6848 if (ksize.empty()) {
6849 ksize = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
6850 : std::vector<int>{1, 1, 1, 1, 1};
6851 }
6852 if (strides.empty()) {
6853 strides = nDim == 2 ? std::vector<int>{1, 1, 1, 1}
6854 : std::vector<int>{1, 1, 1, 1, 1};
6855 }
6856 if (data_format == "") {
6857 data_format = nDim == 2 ? "NCHW" : "NCDHW";
6858 }
6859 if (is_max_pooling) {
6860 if (nDim == 3) {
6861 return CreatePoolOp<ops::MaxPool3D>(tf_type, ksize, strides, padding,
6862 data_format);
6863 } else {
6864 return CreatePoolOp<ops::MaxPool>(tf_type, ksize, strides, padding,
6865 data_format);
6866 }
6867 } else {
6868 if (nDim == 3) {
6869 return CreatePoolOp<ops::AvgPool3D>(tf_type, ksize, strides, padding,
6870 data_format);
6871 } else {
6872 return CreatePoolOp<ops::AvgPool>(tf_type, ksize, strides, padding,
6873 data_format);
6874 }
6875 }
6876 };
6877
6878 std::vector<int> test_nDims{2, 3};
6879
6880 for (int nDim : test_nDims) {
6881 // Input is weights, should fail.
6882 Reset();
6883 NodeDef node_def = get_pool_nodedef(tf_type_, nDim);
6884
6885 AddTestWeights<float>("input", {1, 1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
6886 RunValidationAndConversion(
6887 node_def, error::UNIMPLEMENTED,
6888 StrCat("The input \"input\" for ", node_def.op(), " must be a tensor"));
6889 }
6890
6891 struct TestParams {
6892 std::vector<int> input_dims;
6893 std::vector<float> input;
6894 std::vector<int> ksize;
6895 std::vector<int> strides;
6896 string padding;
6897 string data_format;
6898 std::vector<int> expected_output_dims;
6899 // The expected outputs for the following operations: MaxPool2D, AvgPool2D,
6900 // MaxPool3D, AvgPool3D
6901 std::vector<std::vector<float>> expected_outputs;
6902 };
6903
6904 // We use common_input as the input to test both 2D and 3D pooling operations,
6905 // to simplify TestParams. For 2D operations, only the first 1/3 of the values
6906 // are used.
6907 const std::vector<float> common_input{-4, 2, 15, 3, 6, -3, 22, 1, 88,
6908 56, 36, 1, 1, 105, 1, 16, -28, 1,
6909 42, 9, 3, 1, 7, 1, 11, 61, 5};
6910 // The output of 2D ops for the case where the op is equivalent to the
6911 // identity op.
6912 const std::vector<float> common_2d_output{-4, 2, 15, 3, 6, -3, 22, 1, 88};
6913 std::vector<TestParams> ok_params = {
6914 // Basic - just 1x1 max pooling - input = output
6915 TestParams{
6916 /*input_dims=*/{1, 1, 3, 3, 3},
6917 /*input=*/common_input,
6918 /*ksize=*/{1, 1, 1, 1, 1},
6919 /*strides=*/{1, 1, 1, 1, 1},
6920 /*padding=*/"VALID",
6921 /*data_format=*/"NCDHW",
6922 /*expected_output_dims=*/{1, 1, 3, 3, 3},
6923 /*expected_outputs=*/
6924 {common_2d_output, common_2d_output, common_input, common_input}},
6925 // Basic - just 1x1 max pooling - input = output, SAME padding
6926 TestParams{
6927 /*input_dims=*/{1, 1, 3, 3, 3},
6928 /*input=*/common_input,
6929 /*ksize=*/{1, 1, 1, 1, 1},
6930 /*strides=*/{1, 1, 1, 1, 1},
6931 /*padding=*/"SAME",
6932 /*data_format=*/"NCDHW",
6933 /*expected_output_dims=*/{1, 1, 3, 3, 3},
6934 /*expected_outputs=*/
6935 {common_2d_output, common_2d_output, common_input, common_input}},
6936 // 3x3 pooling NCDHW
6937 TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
6938 /*input=*/common_input,
6939 /*ksize=*/{1, 1, 3, 3, 3},
6940 /*strides=*/{1, 1, 1, 1, 1},
6941 /*padding=*/"VALID",
6942 /*data_format=*/"NCDHW",
6943 /*expected_output_dims=*/{1, 1, 1, 1, 1},
6944 /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
6945 // 3x3 pooling, NDHWC
6946 TestParams{/*input_dims=*/{1, 3, 3, 3, 1},
6947 /*input=*/common_input,
6948 /*ksize=*/{1, 3, 3, 3, 1},
6949 /*strides=*/{1, 1, 1, 1, 1},
6950 /*padding=*/"VALID",
6951 /*data_format=*/"NDHWC",
6952 /*expected_output_dims=*/{1, 1, 1, 1, 1},
6953 /*expected_outputs=*/{{88}, {14.444445}, {105}, {17}}},
6954 // Strided
6955 TestParams{/*input_dims=*/{1, 1, 3, 3, 3},
6956 /*input=*/{1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0,
6957 0, 0, 0, 0, 5, 0, 6, 0, 0, 0, 7, 0, 8},
6958 /*ksize=*/{1, 1, 1, 1, 1},
6959 /*strides=*/{1, 1, 2, 2, 2},
6960 /*padding=*/"VALID",
6961 /*data_format=*/"NCDHW",
6962 /*expected_output_dims=*/{1, 1, 2, 2, 2},
6963 /*expected_outputs=*/
6964 {{1, 2, 3, 4}, // Should only pick up the corners
6965 {1, 2, 3, 4},
6966 {1, 2, 3, 4, 5, 6, 7, 8},
6967 {1, 2, 3, 4, 5, 6, 7, 8}}},
6968 };
6969
6970 for (auto p : ok_params) {
6971 int test_counter = 0;
6972 for (int nDim : test_nDims) {
6973 auto input = p.input;
6974 auto input_dims = p.input_dims;
6975 auto ksize = p.ksize;
6976 auto strides = p.strides;
6977 auto expected_output_dims = p.expected_output_dims;
6978 std::string data_format = p.data_format;
6979 if (nDim == 2) {
6980 input.resize(9);
6981 data_format = p.data_format == "NDHWC" ? "NHWC" : "NCHW";
6982 // Remove one of the spatial dimensions
6983 input_dims.erase(input_dims.begin() + 2);
6984 ksize.erase(ksize.begin() + 2);
6985 strides.erase(strides.begin() + 2);
6986 expected_output_dims.erase(expected_output_dims.begin() + 2);
6987 }
6988 for (bool is_max_pooling : {true, false}) {
6989 Reset();
6990 NodeDef node_def =
6991 get_pool_nodedef(tf_type_, nDim, ksize, strides, p.padding,
6992 data_format, is_max_pooling);
6993 AddTestTensor("input", input_dims, input);
6994 TestOpConverter("my_pool", node_def, expected_output_dims, Status::OK(),
6995 Status::OK(),
6996 ElementsAreArray(p.expected_outputs.at(test_counter)));
6997 test_counter++;
6998 }
6999 }
7000 }
7001 }
7002
TEST_P(OpConverter_FP32_FP16_Test,ConvertTopK)7003 TEST_P(OpConverter_FP32_FP16_Test, ConvertTopK) {
7004 // Get the NodeDef for TopKV2.
7005 Scope s = Scope::NewRootScope();
7006 auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
7007 auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
7008 auto topk = ops::TopK(s.WithOpName("my_topk"), input, weights);
7009 const NodeDef& node_def = topk.operation.node()->def();
7010 {
7011 // K is a tensor, should fail.
7012 Reset();
7013 AddTestTensor("input", {1, 1, 2, 3});
7014 AddTestTensor("weights", {1}, DT_INT32, {});
7015 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7016 "The input \"k\" for TopKV2 must be a constant");
7017 }
7018 {
7019 // Ok.
7020 Reset();
7021 AddTestTensor("input", {1, 1, 2, 5}, {-9, 3, 5, 1, 6, -5, 7, 1, 0, -1});
7022 AddTestWeights<int32>("weights", {1}, {2});
7023 std::vector<std::vector<int>> expected_output_dims{{1, 1, 2, 2},
7024 {1, 1, 2, 2}};
7025 TestOpConverterMultiOut("my_topk", node_def, expected_output_dims,
7026 Status::OK(), Status::OK(),
7027 {ElementsAre(6, 5, 7, 1), ElementsAre(4, 2, 1, 2)},
7028 {tf_type_, DT_INT32});
7029 }
7030 }
7031
7032 struct DataFormatVecPermuteTestParams {
7033 string dst_format;
7034 string src_format;
7035 std::vector<int> x_shape;
7036 std::vector<int> x;
7037 bool x_is_tensor;
7038 std::vector<int> expected_output;
7039 Status conversion_status;
7040 };
7041
GetDataFormatVecPermuteNodeDef(string dst_format,string src_format,std::vector<int> & x_shape)7042 NodeDef GetDataFormatVecPermuteNodeDef(string dst_format, string src_format,
7043 std::vector<int>& x_shape) {
7044 Scope s = Scope::NewRootScope();
7045 PartialTensorShape tensor_shape;
7046 auto x = ops::Placeholder(s.WithOpName("x"), DT_INT32);
7047 const auto attrs = ops::DataFormatVecPermute::Attrs()
7048 .DstFormat(dst_format)
7049 .SrcFormat(src_format);
7050 auto dfvp = ops::DataFormatVecPermute(s.WithOpName("my_dfvp"), x, attrs);
7051 return dfvp.operation.node()->def();
7052 }
7053
TEST_P(OpConverter_INT32_Test,ConvertDataFormatVecPermute)7054 TEST_P(OpConverter_INT32_Test, ConvertDataFormatVecPermute) {
7055 Status implicit_error = Status{
7056 error::UNIMPLEMENTED, "Implicit batch mode not supported, at my_dfvp"};
7057
7058 std::vector<DataFormatVecPermuteTestParams> test_params = {
7059 // 1D case with tensor.
7060 DataFormatVecPermuteTestParams{
7061 /*dst_format=*/"NCHW",
7062 /*src_format=*/"NHWC",
7063 /*x_shape=*/{4},
7064 /*x=*/{1, 2, 3, 4},
7065 /*x_is_tensor=*/true,
7066 /*expected_output=*/{1, 4, 2, 3},
7067 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7068 ? implicit_error
7069 : Status::OK()},
7070 // 1D case with weights.
7071 DataFormatVecPermuteTestParams{
7072 /*dst_format=*/"NCHW",
7073 /*src_format=*/"NHWC",
7074 /*x_shape=*/{4},
7075 /*x=*/{1, 2, 3, 4},
7076 /*x_is_tensor=*/false,
7077 /*expected_output=*/{1, 4, 2, 3},
7078 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7079 ? implicit_error
7080 : Status::OK()},
7081 // 2D case with tensor.
7082 DataFormatVecPermuteTestParams{
7083 /*dst_format=*/"NCHW",
7084 /*src_format=*/"NHWC",
7085 /*x_shape=*/{4, 2},
7086 /*x=*/{1, 2, 3, 4, 5, 6, 7, 8},
7087 /*x_is_tensor=*/true,
7088 /*expected_output=*/{1, 2, 7, 8, 3, 4, 5, 6},
7089 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7090 ? implicit_error
7091 : Status::OK()},
7092 // 2D case with weights.
7093 DataFormatVecPermuteTestParams{
7094 /*dst_format=*/"NCHW",
7095 /*src_format=*/"NHWC",
7096 /*x_shape=*/{4, 2},
7097 /*x=*/{1, 2, 3, 4, 5, 6, 7, 8},
7098 /*x_is_tensor=*/false,
7099 /*expected_output=*/{1, 2, 7, 8, 3, 4, 5, 6},
7100 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7101 ? implicit_error
7102 : Status::OK()},
7103 // Format of size 5.
7104 DataFormatVecPermuteTestParams{
7105 /*dst_format=*/"NCDHW",
7106 /*src_format=*/"NDHWC",
7107 /*x_shape=*/{5, 2},
7108 /*x=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
7109 /*x_is_tensor=*/true,
7110 /*expected_output=*/{1, 2, 9, 10, 3, 4, 5, 6, 7, 8},
7111 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7112 ? implicit_error
7113 : Status::OK()},
7114 // Input of size 2: treat the elements as spatial dimensions.
7115 DataFormatVecPermuteTestParams{
7116 /*dst_format=*/"NCWH",
7117 /*src_format=*/"NHWC",
7118 /*x_shape=*/{2, 2},
7119 /*x=*/{1, 2, 3, 4},
7120 /*x_is_tensor=*/true,
7121 /*expected_output=*/{3, 4, 1, 2},
7122 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7123 ? implicit_error
7124 : Status::OK()},
7125 // Input of size 3: treat the elements as spatial dimensions.
7126 DataFormatVecPermuteTestParams{
7127 /*dst_format=*/"NCHWD",
7128 /*src_format=*/"NDHWC",
7129 /*x_shape=*/{3},
7130 /*x=*/{1, 2, 3},
7131 /*x_is_tensor=*/true,
7132 /*expected_output=*/{2, 3, 1},
7133 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7134 ? implicit_error
7135 : Status::OK()},
7136 // Invalid rank, should fail.
7137 DataFormatVecPermuteTestParams{
7138 /*dst_format=*/"NCHW",
7139 /*src_format=*/"NHWC",
7140 /*x_shape=*/{2, 2, 2},
7141 /*x=*/{1, 2, 3, 4, 5, 6, 7, 8},
7142 /*x_is_tensor=*/true,
7143 /*expected_output=*/{},
7144 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7145 ? implicit_error
7146 : Status{error::INVALID_ARGUMENT,
7147 "Input must be a vector or matrix, but got rank 3, at "
7148 "my_dfvp"}},
7149 // Invalid size for 1D input, should fail.
7150 DataFormatVecPermuteTestParams{
7151 /*dst_format=*/"NCHW",
7152 /*src_format=*/"NHWC",
7153 /*x_shape=*/{3},
7154 /*x=*/{1, 2, 3},
7155 /*x_is_tensor=*/true,
7156 /*expected_output=*/{},
7157 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7158 ? implicit_error
7159 : Status{error::INVALID_ARGUMENT,
7160 "1D input must be of size 2 or 4, but got size 3, at "
7161 "my_dfvp"}},
7162 // Invalid first dim for 2D input, should fail.
7163 DataFormatVecPermuteTestParams{
7164 /*dst_format=*/"NCDHW",
7165 /*src_format=*/"NDHWC",
7166 /*x_shape=*/{4, 2},
7167 /*x=*/{1, 2, 3, 4, 5, 6, 7, 8},
7168 /*x_is_tensor=*/true,
7169 /*expected_output=*/{},
7170 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7171 ? implicit_error
7172 : Status{error::INVALID_ARGUMENT,
7173 "First dimension of 2D input must be of size 3 or 5, "
7174 "but got shape (4, 2), at my_dfvp"}},
7175 // Invalid second dim for 2D input, should fail.
7176 DataFormatVecPermuteTestParams{
7177 /*dst_format=*/"NCHW",
7178 /*src_format=*/"NHWC",
7179 /*x_shape=*/{4, 3},
7180 /*x=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
7181 /*x_is_tensor=*/true,
7182 /*expected_output=*/{},
7183 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7184 ? implicit_error
7185 : Status{error::INVALID_ARGUMENT,
7186 "Second dimension of 2D input must be of size 2, but "
7187 "got shape (4, 3), at my_dfvp"}},
7188 };
7189
7190 for (auto p : test_params) {
7191 Reset();
7192 const NodeDef node_def =
7193 GetDataFormatVecPermuteNodeDef(p.dst_format, p.src_format, p.x_shape);
7194
7195 if (p.x_is_tensor) {
7196 AddTestTensor("x", p.x_shape, DT_INT32, p.x, p.x_shape);
7197 } else {
7198 AddTestWeights("x", p.x_shape, p.x, DT_INT32);
7199 }
7200
7201 TestOpConverter("my_dfvp", node_def, p.x_shape, p.conversion_status,
7202 Status::OK(), ElementsAreArray(p.expected_output));
7203 }
7204 }
7205
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertGather)7206 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertGather) {
7207 // Get the NodeDef for GatherV2.
7208 Scope s = Scope::NewRootScope();
7209 auto params = ops::Placeholder(s.WithOpName("params"), tf_type_);
7210 auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32);
7211 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
7212 auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis);
7213 const NodeDef& node_def = gather.operation.node()->def();
7214 {
7215 // Axis is a tensor, should fail.
7216 Reset();
7217 AddTestTensor("params", {1, 1, 2, 3}, tf_type_, {});
7218 AddTestTensor("indices", {1, 2}, DT_INT32, {});
7219 AddTestTensor("axis", {1}, DT_INT32, {});
7220 RunValidationAndConversion(
7221 node_def, error::UNIMPLEMENTED,
7222 "The input \"axis\" for GatherV2 must be a constant");
7223 }
7224 {
7225 // Axis is out of bounds, should fail.
7226 Reset();
7227 AddTestTensor("params", {1, 1, 2, 3});
7228 AddTestTensor("indices", {1, 2}, DT_INT32, {});
7229 AddTestWeights<int32>("axis", {1}, {4});
7230 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7231 "Axis value of 4 is out of bounds, must be in "
7232 "range [-4, 4)");
7233 }
7234
7235 struct TestParams {
7236 // TF shape of the input 'params' (including batch dimension).
7237 std::vector<int> params_shape;
7238 // TF shape of the input 'indices' (including batch dimension).
7239 std::vector<int> indices_shape;
7240 std::vector<int> indices;
7241 int axis;
7242 // Expected TF shape of the output (including batch dimension).
7243 std::vector<int> expected_output_shape;
7244 std::vector<int> expected_output;
7245 bool params_is_tensor;
7246 bool indices_is_tensor;
7247 Status conversion_status;
7248 Status runtime_status;
7249 Status add_index_status;
7250 };
7251
7252 // Input is the same {1, 2, 3, 4, 5, 6} for all cases.
7253 const std::vector<int> params_input = {1, 2, 3, 4, 5, 6};
7254
7255 std::vector<TestParams> test_params = {
7256 // Axis is batch dimension, should fail in implicit batch mode.
7257 TestParams{/*params_shape=*/{2, 1, 1, 3},
7258 /*indices_shape=*/{2},
7259 /*indices=*/{1, 0},
7260 /*axis=*/0,
7261 /*expected_output_shape=*/{2, 1, 1, 3},
7262 /*expected_output=*/{4, 5, 6, 1, 2, 3},
7263 /*params_is_tensor=*/true,
7264 /*indices_is_tensor=*/true,
7265 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7266 ? Status{error::UNIMPLEMENTED,
7267 "TensorRT does not allow "
7268 "manipulation of the batch dimension"}
7269 : Status::OK()},
7270 // Batch size of indices is not 1 when params and indices are tensors.
7271 TestParams{/*params_shape=*/{2, 1, 3},
7272 /*indices_shape=*/{2, 1},
7273 /*indices=*/{2, 0},
7274 /*axis=*/2,
7275 /*expected_output_shape=*/{2, 1, 2, 1},
7276 /*expected_output=*/{3, 1, 6, 4},
7277 /*params_is_tensor=*/true,
7278 /*indices_is_tensor=*/true,
7279 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7280 ? Status{error::UNIMPLEMENTED,
7281 "Params and indices must have a"
7282 " batch size of 1 when params and indices are "
7283 "both tensors or both"
7284 " constants."}
7285 : Status::OK()},
7286 // Batch size of indices is not 1 when params is tensor and indices are
7287 // constant.
7288 TestParams{/*params_shape=*/{2, 1, 3},
7289 /*indices_shape=*/{2, 1},
7290 /*indices=*/{2, 0},
7291 /*axis=*/2,
7292 /*expected_output_shape=*/{2, 1, 2, 1},
7293 /*expected_output=*/{3, 1, 6, 4},
7294 /*params_is_tensor=*/true,
7295 /*indices_is_tensor=*/false,
7296 /*conversion_status=*/Status::OK()},
7297 // Axis is not zero when params is a weight, should fail in implicit batch
7298 // mode.
7299 TestParams{/*params_shape=*/{2, 1, 3},
7300 /*indices_shape=*/{2},
7301 /*indices=*/{1, 2},
7302 /*axis=*/2,
7303 /*expected_output_shape=*/{2, 1, 2},
7304 /*expected_output=*/{2, 3, 5, 6},
7305 /*params_is_tensor=*/false,
7306 /*indices_is_tensor=*/true,
7307 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7308 ? Status{error::UNIMPLEMENTED,
7309 "The input axis must be zero when "
7310 "params is a weight."}
7311 : Status::OK()},
7312 // Params with only batch dimension.
7313 TestParams{
7314 /*params_shape=*/{6},
7315 /*indices_shape=*/{2},
7316 /*indices=*/{1, 3},
7317 /*axis=*/0,
7318 /*expected_output_shape=*/{2},
7319 /*expected_output=*/{2, 4},
7320 /*params_is_tensor=*/true,
7321 /*indices_is_tensor=*/true,
7322 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7323 ? Status{error::UNIMPLEMENTED,
7324 "TensorRT does not allow "
7325 "manipulation of the batch dimension"}
7326 : Status::OK(),
7327 /*runtime_status=*/Status::OK(),
7328 /*add_index_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7329 ? Status{error::INVALID_ARGUMENT,
7330 "Batch size doesn't match for "
7331 "tensor indices: Provided batch size does not match "
7332 "converter batch size: 2 vs 6"}
7333 : Status::OK()},
7334 // Vector indices, and output rank is rank(params).
7335 TestParams{
7336 /*params_shape=*/{1, 1, 2, 3},
7337 /*indices_shape=*/{1},
7338 /*indices=*/{0},
7339 /*axis=*/3,
7340 /*expected_output_shape=*/{1, 1, 2, 1},
7341 /*expected_output=*/{1, 4},
7342 /*params_is_tensor=*/true,
7343 /*indices_is_tensor=*/true,
7344 },
7345 TestParams{
7346 /*params_shape=*/{1, 1, 2, 3},
7347 /*indices_shape=*/{1},
7348 /*indices=*/{1},
7349 /*axis=*/2,
7350 /*expected_output_shape=*/{1, 1, 1, 3},
7351 /*expected_output=*/{4, 5, 6},
7352 /*params_is_tensor=*/true,
7353 /*indices_is_tensor=*/true,
7354 },
7355 // Indices with rank>1, and output rank is rank(params) + rank(indices) -
7356 // 1
7357 TestParams{
7358 /*params_shape=*/{1, 1, 2, 3},
7359 /*indices_shape=*/{1, 1},
7360 /*indices=*/{0},
7361 /*axis=*/3,
7362 /*expected_output_shape=*/{1, 1, 2, 1, 1},
7363 /*expected_output=*/{1, 4},
7364 /*params_is_tensor=*/true,
7365 /*indices_is_tensor=*/true,
7366 },
7367 TestParams{
7368 /*params_shape=*/{1, 1, 2, 3},
7369 /*indices_shape=*/{1, 1},
7370 /*indices=*/{1},
7371 /*axis=*/3,
7372 /*expected_output_shape=*/{1, 1, 2, 1, 1},
7373 /*expected_output=*/{2, 5},
7374 /*params_is_tensor=*/true,
7375 /*indices_is_tensor=*/true,
7376 },
7377 TestParams{
7378 /*params_shape=*/{1, 1, 2, 3},
7379 /*indices_shape=*/{1, 1},
7380 /*indices=*/{2},
7381 /*axis=*/-1,
7382 /*expected_output_shape=*/{1, 1, 2, 1, 1},
7383 /*expected_output=*/{3, 6},
7384 /*params_is_tensor=*/true,
7385 /*indices_is_tensor=*/true,
7386 },
7387 TestParams{
7388 /*params_shape=*/{1, 1, 2, 3},
7389 /*indices_shape=*/{1, 3},
7390 /*indices=*/{2, 0, 1},
7391 /*axis=*/3,
7392 /*expected_output_shape=*/{1, 1, 2, 1, 3},
7393 /*expected_output=*/{3, 1, 2, 6, 4, 5},
7394 /*params_is_tensor=*/true,
7395 /*indices_is_tensor=*/true,
7396 },
7397 TestParams{
7398 /*params_shape=*/{1, 3, 2},
7399 /*indices_shape=*/{1, 2, 2},
7400 /*indices=*/{0, 0, 1, 0},
7401 /*axis=*/2,
7402 /*expected_output_shape=*/{1, 3, 1, 2, 2},
7403 /*expected_output=*/{1, 1, 2, 1, 3, 3, 4, 3, 5, 5, 6, 5},
7404 /*params_is_tensor=*/true,
7405 /*indices_is_tensor=*/true,
7406 },
7407 TestParams{
7408 /*params_shape=*/{1, 2, 3},
7409 /*indices_shape=*/{1},
7410 /*indices=*/{0},
7411 /*axis=*/0,
7412 /*expected_output_shape=*/{1, 2, 3},
7413 /*expected_output=*/{1, 2, 3, 4, 5, 6},
7414 /*params_is_tensor=*/false,
7415 /*indices_is_tensor=*/true,
7416 },
7417 TestParams{
7418 /*params_shape=*/{3, 2},
7419 /*indices_shape=*/{1, 2},
7420 /*indices=*/{0, 1},
7421 /*axis=*/0,
7422 /*expected_output_shape=*/{1, 2, 2},
7423 /*expected_output=*/{1, 2, 3, 4},
7424 /*params_is_tensor=*/false,
7425 /*indices_is_tensor=*/true,
7426 },
7427 TestParams{
7428 /*params_shape=*/{2, 3},
7429 /*indices_shape=*/{1, 1, 2},
7430 /*indices=*/{0, 1},
7431 /*axis=*/0,
7432 /*expected_output_shape=*/{1, 1, 2, 3},
7433 /*expected_output=*/{1, 2, 3, 4, 5, 6},
7434 /*params_is_tensor=*/false,
7435 /*indices_is_tensor=*/true,
7436 },
7437 TestParams{
7438 /*params_shape=*/{3, 2},
7439 /*indices_shape=*/{2, 2},
7440 /*indices=*/{0, 2, 1, 0},
7441 /*axis=*/0,
7442 /*expected_output_shape=*/{2, 2, 2},
7443 /*expected_output=*/{1, 2, 5, 6, 3, 4, 1, 2},
7444 /*params_is_tensor=*/false,
7445 /*indices_is_tensor=*/true,
7446 },
7447 // Test cases in which indices constant
7448 TestParams{
7449 /*params_shape=*/{1, 1, 2, 3},
7450 /*indices_shape=*/{1, 1},
7451 /*indices=*/{0},
7452 /*axis=*/3,
7453 /*expected_output_shape=*/{1, 1, 2, 1, 1},
7454 /*expected_output=*/{1, 4},
7455 /*params_is_tensor=*/true,
7456 /*indices_is_tensor=*/false,
7457 },
7458 // Test cases in which both input and indices constant
7459 TestParams{/*params_shape=*/{1, 2, 3},
7460 /*indices_shape=*/{1},
7461 /*indices=*/{0},
7462 /*axis=*/0,
7463 /*expected_output_shape=*/{1, 2, 3},
7464 /*expected_output=*/{1, 2, 3, 4, 5, 6},
7465 /*params_is_tensor=*/false,
7466 /*indices_is_tensor=*/false,
7467 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7468 ? Status{error::UNIMPLEMENTED,
7469 "Params and indices must have a"
7470 " batch size of 1 when params and indices are "
7471 "both tensors or both"
7472 " constants."}
7473 : Status::OK()},
7474 TestParams{/*params_shape=*/{3, 2},
7475 /*indices_shape=*/{2, 2},
7476 /*indices=*/{0, 2, 1, 0},
7477 /*axis=*/0,
7478 /*expected_output_shape=*/{2, 2, 2},
7479 /*expected_output=*/{1, 2, 5, 6, 3, 4, 1, 2},
7480 /*params_is_tensor=*/false,
7481 /*indices_is_tensor=*/false,
7482 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7483 ? Status{error::UNIMPLEMENTED,
7484 "Params and indices must have a"
7485 " batch size of 1 when params and indices are "
7486 "both tensors or both"
7487 " constants."}
7488 : Status::OK()},
7489 };
7490
7491 for (auto p : test_params) {
7492 Reset();
7493
7494 if (p.params_is_tensor) {
7495 AddTestTensor("params", p.params_shape, params_input);
7496 } else {
7497 AddTestWeights("params", p.params_shape, params_input, tf_type_);
7498 }
7499
7500 if (p.indices_is_tensor) {
7501 AddTestTensor("indices", p.indices_shape, DT_INT32, p.indices, {},
7502 p.add_index_status);
7503 } else {
7504 std::vector<int> indices_shape(p.indices_shape);
7505 AddTestWeights("indices", indices_shape, p.indices, DT_INT32);
7506 }
7507
7508 AddTestWeights<int32>("axis", {1}, {p.axis});
7509 TestOpConverter("my_gather", node_def, p.expected_output_shape,
7510 p.conversion_status, p.runtime_status,
7511 ElementsAreArray(p.expected_output));
7512 }
7513 }
7514
7515 template <typename OpType>
CreateReduceOp(DataType tf_type,bool keep_dims)7516 NodeDef CreateReduceOp(DataType tf_type, bool keep_dims) {
7517 Scope s = Scope::NewRootScope();
7518 auto input = ops::Placeholder(s.WithOpName("input"), tf_type);
7519 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
7520 typename OpType::Attrs op_attrs;
7521 op_attrs.keep_dims_ = keep_dims;
7522 auto op = OpType(s.WithOpName("my_reduce"), input, axis, op_attrs);
7523 return op.operation.node()->def();
7524 }
7525
7526 // Applies reduction op on sub-sequences of input
7527 // output[i] = reduce(input[m * i : m * (i +1)])
CalcReduce(string op_name,std::vector<float> input,int m,float (* op)(float,float),float init)7528 std::vector<float> CalcReduce(string op_name, std::vector<float> input, int m,
7529 float (*op)(float, float), float init) {
7530 std::vector<float> output(input.size() / m);
7531 for (int i = 0; i < output.size(); i++) {
7532 auto begin = input.begin() + i * m;
7533 auto end = input.begin() + (i + 1) * m;
7534 output[i] = std::accumulate(begin, end, init, op);
7535 if (op_name == "Mean") {
7536 output[i] /= m;
7537 }
7538 }
7539 return output;
7540 }
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertReduce)7541 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertReduce) {
7542 {
7543 // Input is weights, should fail.
7544 Reset();
7545 const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
7546 AddTestWeights<float>("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
7547 AddTestWeights<int32>("axis", {1}, {1});
7548 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7549 "The input \"input\" for Sum must be a tensor");
7550 }
7551 {
7552 // Axis is weights, should fail.
7553 Reset();
7554 const NodeDef node_def = CreateReduceOp<ops::Sum>(tf_type_, false);
7555 AddTestTensor("input", {1, 2, 3}, {-3, -2, -1, 0, 1, 2});
7556 AddTestTensor("axis", {1}, DT_INT32, {1});
7557 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
7558 "The input \"axis\" for Sum must be a constant");
7559 }
7560 using OpFunc = std::function<NodeDef(DataType, bool)>;
7561 using ValFunc = float (*)(float, float);
7562 struct ReduceTestDescriptor {
7563 string name;
7564 OpFunc get_node;
7565 ValFunc val_func;
7566 float init_val;
7567 };
7568 std::vector<ReduceTestDescriptor> op_test_info{
7569 {"Sum", CreateReduceOp<ops::Sum>, [](float x, float y) { return x + y; },
7570 0},
7571 {"Prod", CreateReduceOp<ops::Prod>,
7572 [](float x, float y) { return x * y; }, 1},
7573 {"Mean", CreateReduceOp<ops::Mean>,
7574 [](float x, float y) { return x + y; }, 0},
7575 {"Min", CreateReduceOp<ops::Min>,
7576 [](float x, float y) { return y < x ? y : x; }, 1000},
7577 {"Max", CreateReduceOp<ops::Max>,
7578 [](float x, float y) { return x < y ? y : x; }, -1000}};
7579
7580 std::vector<float> input_values{1, 2, 3, 4, 5, 6};
7581 struct TestParams {
7582 std::vector<int> input_dims;
7583 std::vector<float> input_values;
7584 // Helper array contains the same elements as input but permuted in a way
7585 // that the reduction can be calculated over contiguous elements using
7586 // CalcReduce
7587 std::vector<float> helper_array;
7588 std::vector<int> axis;
7589 int stride; // product of input_dims along axis
7590 Status conversion_status;
7591 };
7592 std::vector<TestParams> params{
7593 // Out of range tests
7594 TestParams{{2, 3, 1}, input_values, input_values, {3}, 3},
7595 TestParams{{2, 3, 1}, input_values, input_values, {-4}, 3},
7596 // Ok tests
7597 TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {0}, 2},
7598 TestParams{{2, 3, 1}, input_values, input_values, {1}, 3},
7599 TestParams{{2, 3, 1}, input_values, input_values, {2}, 1},
7600 TestParams{{2, 3, 1}, input_values, input_values, {0, 1}, 6},
7601 // Ok tests with negative axis values
7602 TestParams{{2, 3, 1}, input_values, {1, 4, 2, 5, 3, 6}, {-3}, 2},
7603 TestParams{{2, 3, 1}, input_values, input_values, {-2}, 3},
7604 TestParams{{2, 3, 1}, input_values, input_values, {-1}, 1},
7605 TestParams{{2, 3, 1}, input_values, input_values, {-3, 1}, 6},
7606 };
7607
7608 for (bool keep_dims : {false, true}) {
7609 for (auto& op : op_test_info) {
7610 VLOG(2) << "Processing " << op.name << " with keep_dims=" << keep_dims;
7611 for (auto p : params) {
7612 SCOPED_TRACE(StrCat(op.name, keep_dims ? " & keep_dims" : ""));
7613 Reset();
7614 NodeDef node_def = op.get_node(tf_type_, keep_dims);
7615
7616 AddTestTensor("input", p.input_dims, p.input_values);
7617 AddTestWeights<int32>("axis", {static_cast<int>(p.axis.size())},
7618 p.axis);
7619 std::vector<int> expected_output_dims(p.input_dims);
7620
7621 // Set expected output dim and conversion error messages
7622 for (int ax : p.axis) {
7623 int rank = p.input_dims.size();
7624 if (ax >= rank || ax < -rank) {
7625 p.conversion_status =
7626 errors::InvalidArgument("Axis value of ", ax,
7627 " is out of bounds, must be in "
7628 "range [",
7629 -rank, ", ", rank, ")");
7630 } else {
7631 int ax_positive = ax >= 0 ? ax : ax + rank;
7632 // Zero marks elements that we will remove later.
7633 expected_output_dims[ax_positive] = keep_dims ? 1 : 0;
7634 if (trt_mode_ == TrtTestMode::kImplicitBatch &&
7635 (ax == 0 || ax == -rank)) {
7636 p.conversion_status = errors::Unimplemented(
7637 "TensorRT does not allow manipulation of the batch "
7638 "dimension");
7639 }
7640 }
7641 }
7642 expected_output_dims.erase(std::remove(expected_output_dims.begin(),
7643 expected_output_dims.end(), 0),
7644 expected_output_dims.end());
7645 VLOG(2) << "out dims "
7646 << absl::StrCat("[", absl::StrJoin(expected_output_dims, ","),
7647 "]");
7648 std::vector<float> expected_values = CalcReduce(
7649 op.name, p.helper_array, p.stride, op.val_func, op.init_val);
7650
7651 if (tf_type_ == DT_INT32) {
7652 // We need to floor the float values in the `expected_values` vector.
7653 std::for_each(expected_values.begin(), expected_values.end(),
7654 [](float& _n) { _n = std::floor(_n); });
7655 }
7656
7657 TestOpConverter("my_reduce", node_def, expected_output_dims,
7658 p.conversion_status, Status::OK(),
7659 ArrayFloatNear(expected_values));
7660 }
7661 }
7662 }
7663 }
7664
CreateCastOp(DataType tf_type)7665 NodeDef CreateCastOp(DataType tf_type) {
7666 Scope s = Scope::NewRootScope();
7667 auto input = ops::Placeholder(s.WithOpName("input"), DT_HALF);
7668 return ops::Cast(s.WithOpName("my_unary"), input, DT_FLOAT)
7669 .operation.node()
7670 ->def();
7671 }
7672
TEST_P(OpConverter_FP32_UnaryTest,ConvertUnary)7673 TEST_P(OpConverter_FP32_UnaryTest, ConvertUnary) {
7674 using OpFunc = std::function<NodeDef(DataType)>;
7675 using ValFunc = float (*)(float);
7676 std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
7677 #define ADD_OP(name, op, compute) \
7678 op_map[name] = \
7679 std::make_pair(CreateUnaryOp<op>, static_cast<ValFunc>(compute))
7680 ADD_OP("Abs", ops::Abs, std::abs);
7681 ADD_OP("Acos", ops::Acos, std::acos);
7682 ADD_OP("Acosh", ops::Acosh, std::acosh);
7683 ADD_OP("Asin", ops::Asin, std::asin);
7684 ADD_OP("Asinh", ops::Asinh, std::asinh);
7685 ADD_OP("Atan", ops::Atan, std::atan);
7686 ADD_OP("Atanh", ops::Atanh, std::atanh);
7687 op_map["Cast"] = std::make_pair(CreateCastOp, [](float x) { return x; });
7688 ADD_OP("Ceil", ops::Ceil, std::ceil);
7689 ADD_OP("Cos", ops::Cos, std::cos);
7690 ADD_OP("Cosh", ops::Cosh, std::cosh);
7691 ADD_OP("Exp", ops::Exp, std::exp);
7692 ADD_OP("Erf", ops::Erf, std::erf);
7693 ADD_OP("Floor", ops::Floor, std::floor);
7694 ADD_OP("Log", ops::Log, std::log);
7695 ADD_OP("Neg", ops::Neg, [](float x) { return -x; });
7696 ADD_OP("Reciprocal", ops::Reciprocal, [](float x) { return 1.0f / x; });
7697 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
7698 ADD_OP("Round", ops::Round, [](float x) { return (float)std::round(x); });
7699 ADD_OP("Sign", ops::Sign,
7700 [](float x) { return x > 0 ? 1.0f : (x < 0 ? -1.0f : 0.0f); });
7701 #endif
7702 ADD_OP("Rsqrt", ops::Rsqrt, [](float x) { return 1.0f / std::sqrt(x); });
7703 ADD_OP("Sin", ops::Sin, std::sin);
7704 ADD_OP("Sinh", ops::Sinh, std::sinh);
7705 ADD_OP("Sqrt", ops::Sqrt, std::sqrt);
7706 ADD_OP("Tan", ops::Tan, std::tan);
7707 #undef ADD_OP
7708
7709 std::vector<float> input_values{-0.9f, 0.6f, 0.0f, -3.5f, 100.0f, 2.9f};
7710 RunTests("Unary", *UnaryOperationMap(), op_map, input_values, "x");
7711 }
7712
TEST_P(OpConverter_BOOL_Test,ConvertBoolean)7713 TEST_P(OpConverter_BOOL_Test, ConvertBoolean) {
7714 std::vector<int> input_values{1, 0, 1, 0, 0, 1};
7715 using OpFunc = std::function<NodeDef(DataType)>;
7716
7717 using ValFunc = int (*)(int);
7718 std::map<std::string, std::pair<OpFunc, ValFunc>> op_map;
7719 #define ADD_OP(name, op, compute) \
7720 op_map[name] = \
7721 std::make_pair(CreateUnaryOp<op>, static_cast<ValFunc>(compute))
7722 ADD_OP("LogicalNot", ops::LogicalNot, [](int x) { return 1 - x; });
7723 #undef ADD_OP
7724
7725 #if IS_TRT_VERSION_GE(8, 2, 0, 0)
7726 // The test does not actually run for TPT versions less than 8.2
7727 RunTests("LogicalUnary", *UnaryBooleanOperationMap(), op_map, input_values,
7728 "x");
7729 #endif
7730 }
7731
7732 // Get the NodeDef for ConcatV2.
7733 // TODO(hinsu): Consider switching this to static function.
__anond617510d3a02(DataType dtype, int num_inputs) 7734 auto get_concat_nodedef = [](DataType dtype, int num_inputs) -> NodeDef {
7735 Scope s = Scope::NewRootScope();
7736 std::vector<Input> values;
7737 values.reserve(num_inputs);
7738 for (int i = 0; i < num_inputs; ++i) {
7739 const string input_name = StrCat("values_", i);
7740 values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
7741 }
7742 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
7743 auto concat = ops::Concat(s.WithOpName("my_concat"),
7744 absl::Span<const Input>(values), axis);
7745 return concat.operation.node()->def();
7746 };
7747
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertConcat)7748 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertConcat) {
7749 {
7750 // Axis is a tensor, should fail.
7751 Reset();
7752 NodeDef node_def = get_concat_nodedef(tf_type_, 2);
7753 AddTestTensor("values_0", {1, 1, 2, 3});
7754 AddTestTensor("values_1", {1, 1, 2, 3});
7755 AddTestTensor("axis", {1});
7756 RunValidationAndConversion(
7757 node_def, error::UNIMPLEMENTED,
7758 "The input \"axis\" for ConcatV2 must be a constant");
7759 }
7760 {
7761 // Axis is out of bounds, should fail.
7762 Reset();
7763 NodeDef node_def = get_concat_nodedef(tf_type_, 2);
7764 AddTestTensor("values_0", {1, 1, 2, 3});
7765 AddTestTensor("values_1", {1, 1, 2, 3});
7766 AddTestWeights<int32>("axis", {1}, {4});
7767 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7768 "Axis value of 4 is out of bounds, must be in "
7769 "range [-4, 4)");
7770 }
7771 {
7772 // Inputs have inconsistent ranks, should fail.
7773 Reset();
7774 NodeDef node_def = get_concat_nodedef(tf_type_, 2);
7775 AddTestTensor("values_0", {1, 1, 2, 3});
7776 AddTestTensor("values_1", {1, 1, 6});
7777 AddTestWeights<int32>("axis", {1}, {1});
7778 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
7779 "Received inputs with inconsistent rank");
7780 }
7781
7782 struct TestParams {
7783 std::vector<std::vector<int>> input_shapes;
7784 std::vector<std::vector<int>> input_values;
7785 std::vector<bool> inputs_are_tensors;
7786 int axis;
7787 std::vector<int> expected_output_dims;
7788 std::vector<int> expected_output;
7789 Status conversion_status;
7790 Status run_status;
7791 };
7792
7793 const std::vector<std::vector<int>> common_input{CreateVectorIota<int>(6),
7794 CreateVectorIota<int>(6, 6)};
7795
7796 std::vector<TestParams> params = {
7797 {
7798 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
7799 /*input_values=*/common_input,
7800 /*inputs_are_tensors=*/{true, true},
7801 /*axis=*/1,
7802 /*expected_output_dims=*/{1, 2, 2, 3},
7803 /*expected_output=*/CreateVectorIota<int>(12),
7804 },
7805 {
7806 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
7807 /*input_values=*/common_input,
7808 /*inputs_are_tensors=*/{true, true},
7809 /*axis=*/2,
7810 /*expected_output_dims=*/{1, 1, 4, 3},
7811 /*expected_output=*/CreateVectorIota<int>(12),
7812 },
7813 {
7814 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
7815 /*input_values=*/common_input,
7816 /*inputs_are_tensors=*/{true, true},
7817 /*axis=*/3,
7818 /*expected_output_dims=*/{1, 1, 2, 6},
7819 /*expected_output=*/
7820 {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
7821 },
7822 {
7823 /*input_shapes=*/{{1, 1}, {1, 2}, {1, 3}, {1, 1}, {1, 1}, {1, 2}},
7824 /*input_values=*/
7825 {{1}, {2, 3}, {4, 5, 6}, {7}, {8}, {9, 10}},
7826 /*inputs_are_tensors=*/{true, true, true, true, true, true},
7827 /*axis=*/1,
7828 /*expected_output_dims=*/{1, 10},
7829 /*expected_output=*/
7830 CreateVectorIota<int>(10, /*start_value=*/1),
7831 },
7832 {
7833 // An input is a weight
7834 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
7835 /*input_values=*/common_input,
7836 /*inputs_are_tensors=*/{true, false},
7837 /*axis=*/1,
7838 /*expected_output_dims=*/{1, 2, 2, 3},
7839 /*expected_output=*/CreateVectorIota<int>(12),
7840 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7841 ? errors::Unimplemented(
7842 "The input \"values_1\" for ConcatV2 must be a tensor")
7843 : Status::OK(),
7844 /*run_status=*/Status::OK(),
7845 },
7846 {
7847 // An input is a weight
7848 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
7849 /*input_values=*/common_input,
7850 /*inputs_are_tensors=*/{false, false},
7851 /*axis=*/1,
7852 /*expected_output_dims=*/{1, 2, 2, 3},
7853 /*expected_output=*/CreateVectorIota<int>(12),
7854 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7855 ? errors::Unimplemented(
7856 "The input \"values_0\" for ConcatV2 must be a tensor")
7857 : Status::OK(),
7858 /*run_status=*/Status::OK(),
7859 },
7860 {
7861 // Axis is batch dimension, should fail in implicit batch mode.
7862 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 2, 3}},
7863 /*input_values=*/common_input,
7864 /*inputs_are_tensors=*/{true, true},
7865 /*axis=*/0,
7866 /*expected_output_dims=*/{2, 1, 2, 3},
7867 /*expected_output=*/CreateVectorIota<int>(12),
7868 /*conversion_status=*/trt_mode_ == TrtTestMode::kImplicitBatch
7869 ? errors::Unimplemented(
7870 "TensorRT does not allow manipulation of the "
7871 "batch dimension")
7872 : Status::OK(),
7873 },
7874 {
7875 // Inconsistent input shape, runtime error in dynamic shape mode.
7876 /*input_shapes=*/{{1, 1, 2, 3}, {1, 1, 3, 2}},
7877 /*input_values=*/common_input,
7878 /*inputs_are_tensors=*/{true, true},
7879 /*axis=*/1,
7880 /*expected_output_dims=*/{2, 1, 2, 3},
7881 /*expected_output=*/CreateVectorIota<int>(12),
7882 trt_mode_ != TrtTestMode::kDynamicShape
7883 ? errors::InvalidArgument(
7884 "Received inputs with inconsistent shape")
7885 : Status::OK(),
7886 errors::InvalidArgument(""),
7887 }};
7888
7889 for (auto p : params) {
7890 Reset();
7891 const int num_inputs = p.input_shapes.size();
7892 EXPECT_EQ(num_inputs, p.input_values.size());
7893
7894 NodeDef node_def = get_concat_nodedef(tf_type_, num_inputs);
7895
7896 // Create inputs.
7897 for (int j = 0; j < num_inputs; ++j) {
7898 string name = StrCat("values_", j);
7899
7900 if (!p.inputs_are_tensors[j]) {
7901 AddTestWeights(name, p.input_shapes[j], p.input_values[j], tf_type_);
7902 } else {
7903 AddTestTensor(name, p.input_shapes[j], p.input_values[j]);
7904 }
7905 }
7906 AddTestWeights<int32>("axis", {1}, {p.axis});
7907
7908 TestOpConverter("my_concat", node_def, p.expected_output_dims,
7909 p.conversion_status, p.run_status,
7910 ElementsAreArray(p.expected_output));
7911 }
7912 }
7913
7914 // Get the NodeDef for Split.
__anond617510d3b02(DataType dtype, int num_split) 7915 auto get_split_nodedef = [](DataType dtype, int num_split) -> NodeDef {
7916 Scope s = Scope::NewRootScope();
7917 auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32);
7918 auto value = ops::Placeholder(s.WithOpName("value"), dtype);
7919 auto split = ops::Split(s.WithOpName("my_split"), axis, value, num_split);
7920 return split.operation.node()->def();
7921 };
7922
7923 template <DataType dtype>
TestConvertSplit(OpConverterTest * test)7924 void TestConvertSplit(OpConverterTest* test) {
7925 typedef typename EnumToDataType<dtype>::Type CType;
7926
7927 struct TestParams {
7928 std::vector<int> input_shape;
7929 std::vector<CType> value;
7930 int axis;
7931 int num_split;
7932 std::vector<int> expected_output_dims;
7933 std::vector<std::vector<CType>> expected_outputs;
7934 };
7935
7936 const std::vector<CType> common_input = CreateVectorIota<CType>(6);
7937 std::vector<TestParams> ok_params = {
7938 // Identity (num_split = 1)
7939 {/*input_shape=*/{1, 2, 3}, /*value=*/common_input, /*axis=*/1,
7940 /*num_split=*/1, /*expected_output_dims=*/{1, 2, 3},
7941 /*expected_outputs=*/{CreateVectorIota<CType>(6)}},
7942 {/*input_shape=*/{1, 2, 3},
7943 /*value=*/common_input,
7944 /*axis=*/3,
7945 /*num_split=*/3,
7946 /*expected_output_dims=*/{1, 2, 1},
7947 /*expected_outputs=*/
7948 {{CType(0), CType(3)}, {CType(1), CType(4)}, {CType(2), CType(5)}}},
7949 {/*input_shape=*/{1, 6},
7950 /*value=*/common_input,
7951 /*axis=*/2,
7952 /*num_split=*/6,
7953 /*expected_output_dims=*/{1, 1},
7954 /*expected_outputs=*/
7955 {{CType(0)},
7956 {CType(1)},
7957 {CType(2)},
7958 {CType(3)},
7959 {CType(4)},
7960 {CType(5)}}},
7961 {/*input_shape=*/{1, 6},
7962 /*value=*/common_input,
7963 /*axis=*/-1,
7964 /*num_split=*/2,
7965 /*expected_output_dims=*/{1, 3},
7966 /*expected_outputs=*/
7967 {CreateVectorIota<CType>(3), CreateVectorIota<CType>(3, CType(3))}},
7968 };
7969
7970 for (int i = 0; i < ok_params.size(); ++i) {
7971 test->Reset();
7972 NodeDef node_def = get_split_nodedef(dtype, ok_params[i].num_split);
7973 // Create inputs.
7974 test->AddTestWeights<int32>("axis", {1}, {ok_params[i].axis});
7975 nvinfer1::DataType trt_type;
7976 TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
7977 test->AddTestTensor("value", ok_params[i].input_shape, 1, trt_type);
7978 // Convert.
7979 test->RunValidationAndConversion(node_def);
7980
7981 // Get output tensors and verify output dims.
7982 EXPECT_EQ(ok_params[i].expected_outputs.size(), ok_params[i].num_split);
7983 std::vector<TRT_TensorOrWeights> outputs(ok_params[i].num_split);
7984 DataVec output_data;
7985 for (int j = 0; j < outputs.size(); ++j) {
7986 const string name = j == 0 ? StrCat("my_split") : StrCat("my_split:", j);
7987 TF_EXPECT_OK(test->GetTensorOrWeights(name, &outputs[j]));
7988 EXPECT_TRUE(outputs[j].is_tensor());
7989 EXPECT_THAT(outputs[j].tensor()->getDimensions(),
7990 DimsAreArray(ok_params[i].expected_output_dims));
7991 // Create buffer to store output.
7992 output_data.push_back(
7993 {name, test->ConstructTensor<CType>(
7994 ok_params[i].expected_outputs[j].size())});
7995 }
7996
7997 // Verify output values are correct.
7998 const DataVec input_data{
7999 {"value", test->AsTensor<CType>(ok_params[i].value)}};
8000 TF_EXPECT_OK(test->BuildAndRun(input_data, &output_data));
8001 for (int j = 0; j < outputs.size(); ++j) {
8002 EXPECT_THAT(GetSpanForData<CType>(output_data[j]),
8003 ElementsAreArray(ok_params[i].expected_outputs[j]));
8004 }
8005 }
8006 }
8007
TEST_F(OpConverterTest,ConvertSplit)8008 TEST_F(OpConverterTest, ConvertSplit) {
8009 {
8010 // Axis is a tensor, should fail.
8011 Reset();
8012 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
8013 AddTestTensor("axis", {1});
8014 AddTestTensor("value", {1, 2, 3});
8015 RunValidationAndConversion(
8016 node_def, error::UNIMPLEMENTED,
8017 "The input \"axis\" for Split must be a constant");
8018 }
8019 {
8020 // Axis is out of bounds, should fail.
8021 Reset();
8022 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
8023 AddTestWeights<int32>("axis", {1}, {4});
8024 AddTestTensor("value", {1, 2, 3});
8025 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8026 "Axis value of 4 is out of bounds, must be in "
8027 "range [-4, 4)");
8028 }
8029 {
8030 // Axis is out of bounds (negative), should fail.
8031 Reset();
8032 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
8033 AddTestWeights<int32>("axis", {1}, {-5});
8034 AddTestTensor("value", {1, 2, 3});
8035 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8036 "Axis value of -5 is out of bounds, must be in "
8037 "range [-4, 4)");
8038 }
8039 {
8040 // Axis is batch dimension, should fail.
8041 Reset();
8042 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
8043 AddTestWeights<int32>("axis", {1}, {0});
8044 AddTestTensor("value", {1, 2, 3});
8045 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8046 "TensorRT does not allow manipulation of the "
8047 "batch dimension");
8048 }
8049 {
8050 // Value is a weight, should fail.
8051 Reset();
8052 NodeDef node_def = get_split_nodedef(DT_FLOAT, 1);
8053 AddTestWeights<int32>("axis", {1}, {1});
8054 AddTestWeights<float>("value", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
8055 RunValidationAndConversion(
8056 node_def, error::UNIMPLEMENTED,
8057 "The input \"value\" for Split must be a tensor");
8058 }
8059 {
8060 // Dim is not evenly divisibly by num_split, should fail.
8061 Reset();
8062 NodeDef node_def = get_split_nodedef(DT_FLOAT, 2);
8063 AddTestWeights<int32>("axis", {1}, {3});
8064 AddTestTensor("value", {1, 2, 3});
8065 RunValidationAndConversion(
8066 node_def, error::INVALID_ARGUMENT,
8067 "Dimension 3 of size 3 is not evenly divisible by 2");
8068 }
8069 {
8070 // num_split > dim size, should fail.
8071 Reset();
8072 NodeDef node_def = get_split_nodedef(DT_FLOAT, 4);
8073 AddTestWeights<int32>("axis", {1}, {3});
8074 AddTestTensor("value", {1, 2, 3});
8075 RunValidationAndConversion(
8076 node_def, error::INVALID_ARGUMENT,
8077 "Dimension 3 of size 3 is not evenly divisible by 4");
8078 }
8079
8080 TestConvertSplit<DT_FLOAT>(this);
8081 TestConvertSplit<DT_HALF>(this);
8082 TestConvertSplit<DT_INT32>(this);
8083 }
8084
8085 // Get the NodeDef for Unpack (Unstack in TF API).
__anond617510d3c02(DataType dtype, int num, int axis) 8086 auto get_unpack_nodedef = [](DataType dtype, int num, int axis) -> NodeDef {
8087 Scope s = Scope::NewRootScope();
8088 auto value = ops::Placeholder(s.WithOpName("value"), dtype);
8089 auto unstack_attrs = ops::Unstack::Axis(axis);
8090 auto unstack =
8091 ops::Unstack(s.WithOpName("my_unpack"), value, num, unstack_attrs);
8092 return unstack.operation.node()->def();
8093 };
8094
8095 struct UnpackTestParams {
8096 std::vector<int> input_shape;
8097 std::vector<float> input_value;
8098 int axis;
8099 int num;
8100 std::vector<int> expected_output_dims;
8101 std::vector<std::vector<float>> expected_outputs;
8102 Status run_status;
8103 };
8104
TestConvertUnpack(ParameterizedOpConverterTestBase * test,UnpackTestParams & p)8105 void TestConvertUnpack(ParameterizedOpConverterTestBase* test,
8106 UnpackTestParams& p) {
8107 test->Reset();
8108 NodeDef node_def = get_unpack_nodedef(test->get_tf_type(), p.num, p.axis);
8109 // Create inputs.
8110 test->AddTestTensor("value", p.input_shape, test->get_tf_type(),
8111 p.input_value);
8112
8113 std::vector<Matcher<std::vector<float>>> matcher_vec;
8114 std::vector<DataType> datatype_vec;
8115 std::vector<std::vector<int>> expected_output_dims;
8116
8117 for (int j = 0; j < p.expected_outputs.size(); ++j) {
8118 matcher_vec.push_back(ElementsAreArray(p.expected_outputs[j]));
8119 datatype_vec.push_back(test->get_tf_type());
8120 expected_output_dims.push_back(p.expected_output_dims);
8121 }
8122
8123 test->TestOpConverterMultiOut(/*name=*/"my_unpack",
8124 /*node_def=*/node_def,
8125 /*expected_output_dims=*/expected_output_dims,
8126 /*expected_conversion_status=*/p.run_status,
8127 /*expected_runtime_status=*/p.run_status,
8128 /*matcher=*/matcher_vec,
8129 /*out_tf_type=*/datatype_vec);
8130 }
8131
8132 // TODO: Reactivate when INT32 Segfault fixed
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertUnpack)8133 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertUnpack) {
8134 // We need to skip error testing for Dynamic Shape mode, as it is impossible
8135 // to convert Unpack in Dynamic Shape Mode.
8136 if (trt_mode_ != TrtTestMode::kDynamicShape) {
8137 {
8138 // Value is weights, should fail.
8139 Reset();
8140 NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/3, /*axis=*/3);
8141 AddTestWeights<float>("value", {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6});
8142 RunValidationAndConversion(
8143 node_def, error::UNIMPLEMENTED,
8144 "The input \"value\" for Unpack must be a tensor");
8145 }
8146 {
8147 // Axis is out of bounds, should fail.
8148 Reset();
8149 NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/4);
8150 AddTestTensor("value", {1, 1, 2, 3});
8151 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8152 "Axis value of 4 is out of bounds, must be in "
8153 "range [-4, 4)");
8154 }
8155 {
8156 // Axis is out of bounds (negative), should fail.
8157 Reset();
8158 NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/-5);
8159 AddTestTensor("value", {1, 1, 2, 3});
8160 RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8161 "Axis value of -5 is out of bounds, must be "
8162 "in range [-4, 4)");
8163 }
8164 {
8165 if (trt_mode_ != TrtTestMode::kExplicitBatch) {
8166 // Axis is batch dimension, should fail.
8167 Reset();
8168 NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/0);
8169 AddTestTensor("value", {1, 2, 3});
8170 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8171 "TensorRT does not allow manipulation of "
8172 "the batch dimension");
8173 }
8174 }
8175 {
8176 // Dim size does not match num, should fail.
8177 Reset();
8178 NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/5, /*axis=*/2);
8179 AddTestTensor("value", {1, 1, 6});
8180 RunValidationAndConversion(
8181 node_def, error::INVALID_ARGUMENT,
8182 "Dimension 2 has size 6 which is not equal to num of 5");
8183 }
8184 {
8185 // Output would be TF scalar, should fail.
8186 Reset();
8187 NodeDef node_def = get_unpack_nodedef(tf_type_, /*num=*/1, /*axis=*/0);
8188 AddTestTensor(
8189 "value", {}, tf_type_, {}, {},
8190 trt_mode_ == TrtTestMode::kImplicitBatch
8191 ? errors::InvalidArgument(
8192 "removing first dim requires explicit batch dimension")
8193 : Status::OK());
8194 if (trt_mode_ == TrtTestMode::kImplicitBatch) {
8195 RunValidationAndConversion(
8196 node_def, error::INTERNAL,
8197 "Failed to convert at least one input to a TRT_TensorOrWeights: "
8198 "Scalar input tensor is not supported since the first dimension is "
8199 "treated as batch dimension by TRT");
8200 } else {
8201 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8202 "Input \"value\" for Unpack must be rank 2 "
8203 "or greater");
8204 }
8205 }
8206 }
8207
8208 const std::vector<float> common_input = CreateVectorIota<float>(6);
8209
8210 Status run_status =
8211 trt_mode_ == TrtTestMode::kDynamicShape
8212 ? errors::InvalidArgument(
8213 "The argument `strided_slice_spec` is "
8214 "`std::nullopt` with `dynamic_input_size_indices` non empty.")
8215 : Status::OK();
8216
8217 std::vector<UnpackTestParams> params = {
8218 {/*input_shape=*/{1, 1, 2, 1, 3, 1},
8219 /*input_value=*/common_input,
8220 /*axis=*/4,
8221 /*num=*/3,
8222 /*expected_output_dims=*/{1, 1, 2, 1, 1},
8223 /*expected_outputs=*/{{0, 3}, {1, 4}, {2, 5}},
8224 /*run_status=*/run_status},
8225 {/*input_shape=*/{1, 1, 2, 1, 3},
8226 /*input_value=*/common_input,
8227 /*axis=*/4,
8228 /*num=*/3,
8229 /*expected_output_dims=*/{1, 1, 2, 1},
8230 /*expected_outputs=*/{{0, 3}, {1, 4}, {2, 5}},
8231 /*run_status=*/run_status},
8232 {/*input_shape=*/{1, 1, 2, 3},
8233 /*input_value=*/common_input,
8234 /*axis=*/1,
8235 /*num=*/1,
8236 /*expected_output_dims=*/{1, 2, 3},
8237 /*expected_outputs=*/{CreateVectorIota<float>(6)},
8238 /*run_status=*/run_status},
8239 {/*input_shape=*/{1, 6, 1},
8240 /*input_value=*/common_input,
8241 /*axis=*/-2,
8242 /*num=*/6,
8243 /*expected_output_dims=*/{1, 1},
8244 /*expected_outputs=*/{{0}, {1}, {2}, {3}, {4}, {5}},
8245 /*run_status=*/run_status},
8246 {/*input_shape=*/{1, 6},
8247 /*input_value=*/common_input,
8248 /*axis=*/1,
8249 /*num=*/6,
8250 /*expected_output_dims=*/{1},
8251 /*expected_outputs=*/{{0}, {1}, {2}, {3}, {4}, {5}},
8252 /*run_status=*/run_status},
8253 };
8254 for (auto p : params) {
8255 TestConvertUnpack(this, p);
8256 }
8257 }
8258
8259 // Get the NodeDef for Pack.
GetPackNodeDef(DataType dtype,int num_inputs,int axis)8260 NodeDef GetPackNodeDef(DataType dtype, int num_inputs, int axis) {
8261 Scope s = Scope::NewRootScope();
8262 std::vector<Input> values;
8263 values.reserve(num_inputs);
8264 for (int i = 0; i < num_inputs; ++i) {
8265 const string input_name = StrCat("values_", i);
8266 values.push_back(ops::Placeholder(s.WithOpName(input_name), dtype));
8267 }
8268 // Pack op is renamed to Stack in APIs.
8269 auto pack =
8270 ops::Stack(s.WithOpName("my_pack"), absl::Span<const Input>(values),
8271 ops::Stack::Axis(axis));
8272 return pack.operation.node()->def();
8273 }
8274
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertPack)8275 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertPack) {
8276 struct TestParams {
8277 std::vector<std::vector<int>> input_shapes;
8278 std::vector<std::vector<int>> partial_input_shapes;
8279 std::vector<std::vector<float>> input_values;
8280 int axis;
8281 std::vector<int> expected_output_dims;
8282 std::vector<float> expected_output;
8283 Status conversion_status;
8284 Status runtime_status;
8285 bool input_1_is_weight;
8286 };
8287
8288 const std::vector<std::vector<float>> common_input{
8289 CreateVectorIota<float>(6),
8290 CreateVectorIota<float>(6, /*start_value=*/6)};
8291 std::vector<TestParams> params = {
8292 // Second input is weight, should fail in implicit batch mode
8293 {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8294 /*partial_input_shapes=*/{{}, {}},
8295 /*input_values=*/common_input,
8296 /*axis=*/1,
8297 /*expected_output_dims=*/{1, 2, 2, 3},
8298 /*expected_output=*/CreateVectorIota<float>(12),
8299 trt_mode_ == TrtTestMode::kImplicitBatch
8300 ? Status{error::UNIMPLEMENTED,
8301 "The input \"values_1\" for Pack must be a tensor"}
8302 : Status::OK(),
8303 /*runtime_status*/ Status::OK(),
8304 /*weight_input*/ true},
8305 // Axis is out of bounds, should fail.
8306 {
8307 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8308 /*partial_input_shapes=*/{{}, {}},
8309 /*input_values=*/common_input,
8310 /*axis=*/-5,
8311 /*expected_output_dims=*/{},
8312 /*expected_output=*/{},
8313 Status{error::INVALID_ARGUMENT,
8314 "Axis value of -5 is out of bounds, must be in"
8315 " range [-4, 4)"},
8316 },
8317 // Axis is batch dimension, should fail in implicit batch mode.
8318 {/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8319 /*partial_input_shapes=*/{{}, {}},
8320 /*input_values=*/common_input,
8321 /*axis=*/-4,
8322 /*expected_output_dims=*/{2, 1, 2, 3},
8323 /*expected_output=*/CreateVectorIota<float>(12),
8324 trt_mode_ == TrtTestMode::kImplicitBatch
8325 ? Status{error::UNIMPLEMENTED,
8326 "TensorRT does not allow manipulation of the batch "
8327 "dimension"}
8328 : Status::OK()},
8329 // Inconsistent rank, should fail.
8330 {
8331 /*input_shapes=*/{{1, 2, 3}, {1, 6}},
8332 /*partial_input_shapes=*/{{}, {}},
8333 /*input_values=*/common_input,
8334 /*axis=*/1,
8335 /*expected_output_dims=*/{},
8336 /*expected_output=*/{},
8337 Status{error::INVALID_ARGUMENT,
8338 "Received inputs with inconsistent rank"},
8339 },
8340 {
8341 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8342 /*partial_input_shapes=*/{{}, {}},
8343 /*input_values=*/common_input,
8344 /*axis=*/1,
8345 /*expected_output_dims=*/{1, 2, 2, 3},
8346 /*expected_output=*/CreateVectorIota<float>(12),
8347 },
8348 {
8349 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8350 /*partial_input_shapes=*/{{}, {}},
8351 /*input_values=*/common_input,
8352 /*axis=*/2,
8353 /*expected_output_dims=*/{1, 2, 2, 3},
8354 /*expected_output=*/
8355 {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11},
8356 },
8357 {
8358 /*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8359 /*partial_input_shapes=*/{{}, {}},
8360 /*input_values=*/common_input,
8361 /*axis=*/3,
8362 /*expected_output_dims=*/{1, 2, 3, 2},
8363 /*expected_output=*/
8364 {0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11},
8365 },
8366 {
8367 /*input_shapes=*/{{1, 2, 3}},
8368 /*partial_input_shapes=*/{{}},
8369 /*input_values=*/{CreateVectorIota<float>(6)},
8370 /*axis=*/1,
8371 /*expected_output_dims=*/{1, 1, 2, 3},
8372 /*expected_output=*/CreateVectorIota<float>(6),
8373 },
8374 {
8375 /*input_shapes=*/{{1, 2, 3}},
8376 /*partial_input_shapes=*/{{}},
8377 /*input_values=*/{CreateVectorIota<float>(6)},
8378 /*axis=*/2,
8379 /*expected_output_dims=*/{1, 2, 1, 3},
8380 /*expected_output=*/CreateVectorIota<float>(6),
8381 },
8382 };
8383 // Inputs have inconsistent shapes, should fail.
8384 if (trt_mode_ != TrtTestMode::kDynamicShape) {
8385 params.push_back(
8386 TestParams{/*input_shapes=*/{{1, 2, 3}, {1, 3, 2}},
8387 /*partial_input_shapes=*/{{}, {}},
8388 /*input_values=*/common_input,
8389 /*axis=*/1,
8390 /*expected_output_dims=*/{},
8391 /*expected_output=*/CreateVectorIota<float>(12),
8392 Status{error::INVALID_ARGUMENT,
8393 "Received inputs with inconsistent shape"}});
8394 } else {
8395 // In dynamic shape mode we cannot catch inconsistent shapes at conversion
8396 // time, only during runtime. But TensorRT does not raise a proper runtime
8397 // error, instead it aborts the program with the following message:
8398 // Assertion failed: t->start.d[i] + t->extent.d[i] <= r.dims.d[i]
8399 // ../builder/cudnnBuilderGraph.cpp:862
8400 // Aborting...
8401 // TODO(tfeher) Add dynamic shapes test once TRT handles shape error
8402 // decently
8403 }
8404 if (trt_mode_ == TrtTestMode::kDynamicShape) {
8405 // Test with mixed dynamic / static shape input tensors
8406 params.push_back(
8407 TestParams{/*input_shapes=*/{{1, 2, 3}, {1, 2, 3}},
8408 /*partial_input_shapes=*/{{-1, -1, -1}, {1, 2, 3}},
8409 /*input_values=*/common_input,
8410 /*axis=*/2,
8411 /*expected_output_dims=*/{1, 2, 2, 3},
8412 /*expected_output=*/
8413 {0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11}});
8414 }
8415 for (auto p : params) {
8416 Reset();
8417 const int num_inputs = p.input_shapes.size();
8418 EXPECT_EQ(num_inputs, p.input_values.size());
8419
8420 NodeDef node_def = GetPackNodeDef(tf_type_, num_inputs, p.axis);
8421 // Create inputs.
8422 for (int j = 0; j < num_inputs; ++j) {
8423 if (j == 1 && p.input_1_is_weight) {
8424 AddTestWeights(StrCat("values_", j), p.input_shapes[j],
8425 p.input_values[j], tf_type_);
8426 } else {
8427 AddTestTensor(StrCat("values_", j), p.input_shapes[j], tf_type_,
8428 p.input_values[j], p.partial_input_shapes[j]);
8429 }
8430 }
8431 TestOpConverter("my_pack", node_def, p.expected_output_dims,
8432 p.conversion_status, p.runtime_status,
8433 ElementsAreArray(p.expected_output));
8434 }
8435 }
8436
8437 // Get the NodeDef for ArgMin or ArgMax.
8438 template <typename OpType>
GetArgMinMaxNodeDef(DataType input_dtype,DataType output_dtype)8439 NodeDef GetArgMinMaxNodeDef(DataType input_dtype, DataType output_dtype) {
8440 Scope s = Scope::NewRootScope();
8441 auto input = ops::Placeholder(s.WithOpName("input"), input_dtype);
8442 auto dimension = ops::Placeholder(s.WithOpName("dimension"), DT_INT32);
8443 auto attrs = OpType::OutputType(output_dtype);
8444 auto arg = OpType(s.WithOpName("my_arg"), input, dimension, attrs);
8445 return arg.operation.node()->def();
8446 }
8447
8448 struct ArgMinMaxTestParams {
8449 std::vector<int> input_shape;
8450 std::vector<float> input_value;
8451 int axis;
8452 std::vector<int> expected_output_dims;
8453 std::vector<int> expected_argmax_output;
8454 std::vector<int> expected_argmin_output;
8455 Status status;
8456 };
8457
8458 template <typename OpType>
TestConvertArgMinMax(ParameterizedOpConverterTestBase * test,DataType _tf_type,ArgMinMaxTestParams & p)8459 void TestConvertArgMinMax(ParameterizedOpConverterTestBase* test,
8460 DataType _tf_type, ArgMinMaxTestParams& p) {
8461 test->Reset();
8462
8463 NodeDef node_def = GetArgMinMaxNodeDef<OpType>(_tf_type,
8464 /*output_dtype=*/DT_INT32);
8465
8466 std::vector<int> expected_out;
8467 if (node_def.op() == "ArgMax") {
8468 expected_out = p.expected_argmax_output;
8469 } else if (node_def.op() == "ArgMin") {
8470 expected_out = p.expected_argmin_output;
8471 } else {
8472 ASSERT_TRUE(false);
8473 }
8474
8475 test->AddTestTensor("input", p.input_shape, _tf_type, p.input_value);
8476 test->AddTestWeights("dimension", {1}, {p.axis}, DT_INT32);
8477
8478 test->TestOpConverter("my_arg", node_def, p.expected_output_dims,
8479 /*expected_conversion_status=*/p.status,
8480 /*expected_runtime_status=*/Status::OK(),
8481 /*matcher=*/ElementsAreArray(expected_out), {DT_INT32});
8482 }
8483
TEST_P(OpConverter_FP32_FP16_Test,ConvertArgMinMax)8484 TEST_P(OpConverter_FP32_FP16_Test, ConvertArgMinMax) {
8485 {
8486 // Dimension is a tensor, should fail.
8487 Reset();
8488 NodeDef node_def =
8489 GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
8490 /*output_dtype=*/DT_INT32);
8491 AddTestTensor("input", {1, 2, 3});
8492 AddTestTensor("dimension", {1});
8493 RunValidationAndConversion(
8494 node_def, error::UNIMPLEMENTED,
8495 "The input \"dimension\" for ArgMax must be a constant");
8496 }
8497 {
8498 // Output type is INT64, should fail.
8499 Reset();
8500 NodeDef node_def =
8501 GetArgMinMaxNodeDef<ops::ArgMax>(tf_type_,
8502 /*output_dtype=*/DT_INT64);
8503 AddTestTensor("input", {1, 2, 3});
8504 AddTestWeights("dimension", {1}, {3}, DT_INT32);
8505 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8506 "Output type int64 is not supported");
8507 }
8508
8509 const std::vector<float> common_input = CreateVectorIota<float>(6);
8510 std::vector<ArgMinMaxTestParams> params = {
8511 {/*input_shape=*/{2, 3},
8512 /*input_value=*/common_input,
8513 /*axis=*/0,
8514 /*expected_output_dims=*/{3},
8515 /*expected_argmax_output=*/{1, 1, 1},
8516 /*expected_argmin_output=*/{0, 0, 0},
8517 trt_mode_ == TrtTestMode::kImplicitBatch
8518 ? errors::Unimplemented("TensorRT does not allow manipulation of "
8519 "the batch dimension")
8520 : Status::OK()},
8521 {
8522 /*input_shape=*/{1, 6},
8523 /*input_value=*/common_input,
8524 /*axis=*/1,
8525 /*expected_output_dims=*/{1},
8526 /*expected_argmax_output=*/{5},
8527 /*expected_argmin_output=*/{0},
8528 },
8529 {
8530 /*input_shape=*/{1, 10},
8531 /*input_value=*/
8532 {-5.0f, 3.0f, 5.0f, 1.0f, 6.0f, -9.0f, 7.0f, 1.0f, 0.0f, -1.0f},
8533 /*axis=*/-1,
8534 /*expected_output_dims=*/{1},
8535 /*expected_argmax_output=*/{6},
8536 /*expected_argmin_output=*/{5},
8537 },
8538 {
8539 /*input_shape=*/{1, 2, 3},
8540 /*input_value=*/common_input,
8541 /*axis=*/2,
8542 /*expected_output_dims=*/{1, 2},
8543 /*expected_argmax_output=*/{2, 2},
8544 /*expected_argmin_output=*/{0, 0},
8545 },
8546 {
8547 /*input_shape=*/{1, 2, 3},
8548 /*input_value=*/common_input,
8549 /*axis=*/-2,
8550 /*expected_output_dims=*/{1, 3},
8551 /*expected_argmax_output=*/{1, 1, 1},
8552 /*expected_argmin_output=*/{0, 0, 0},
8553 },
8554 {
8555 /*input_shape=*/{1, 2, 1, 3},
8556 /*input_value=*/common_input,
8557 /*axis=*/3,
8558 /*expected_output_dims=*/{1, 2, 1},
8559 /*expected_argmax_output=*/{2, 2},
8560 /*expected_argmin_output=*/{0, 0},
8561 },
8562 {
8563 /*input_shape=*/{1, 2, 1, 3},
8564 /*input_value=*/common_input,
8565 /*axis=*/-3,
8566 /*expected_output_dims=*/{1, 1, 3},
8567 /*expected_argmax_output=*/{1, 1, 1},
8568 /*expected_argmin_output=*/{0, 0, 0},
8569 },
8570 {/*input_shape=*/{1, 2, 1, 1, 3},
8571 /*input_value=*/common_input,
8572 /*axis=*/4,
8573 /*expected_output_dims=*/{1, 2, 1, 1},
8574 /*expected_argmax_output=*/{2, 2},
8575 /*expected_argmin_output=*/{0, 0},
8576 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
8577 errors::Unimplemented("op is not able to support tensors with 4+"
8578 " dimensions (excluding batch size)")
8579 #else
8580 Status::OK()
8581 #endif
8582 },
8583 {/*input_shape=*/{1, 2, 1, 1, 3},
8584 /*input_value=*/common_input,
8585 /*axis=*/-4,
8586 /*expected_output_dims=*/{1, 1, 1, 3},
8587 /*expected_argmax_output=*/{1, 1, 1},
8588 /*expected_argmin_output=*/{0, 0, 0},
8589 #if !IS_TRT_VERSION_GE(7, 0, 0, 11)
8590 errors::Unimplemented("op is not able to support tensors with 4+"
8591 " dimensions (excluding batch size)")
8592 #else
8593 Status::OK()
8594 #endif
8595 },
8596 };
8597
8598 for (auto p : params) {
8599 TestConvertArgMinMax<ops::ArgMin>(this, tf_type_, p);
8600 TestConvertArgMinMax<ops::ArgMax>(this, tf_type_, p);
8601 }
8602 }
8603
8604 // Get the NodeDef for DepthToSpace or SpaceToSpace.
8605 template <typename OpType>
GetDepthSpaceShuffleNodeDef(DataType dtype,int block_size,string data_format)8606 NodeDef GetDepthSpaceShuffleNodeDef(DataType dtype, int block_size,
8607 string data_format) {
8608 Scope s = Scope::NewRootScope();
8609 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
8610 auto attrs = OpType::DataFormat(data_format);
8611 auto shuffle = OpType(s.WithOpName("my_shuffle"), input, block_size, attrs);
8612 return shuffle.operation.node()->def();
8613 }
8614
8615 struct DepthSpaceShuffleTestParams {
8616 std::vector<int> input_dims;
8617 std::vector<int> input_value;
8618 int block_size;
8619 string data_format;
8620 std::vector<int> expected_output_dims;
8621 std::vector<int> expected_output;
8622 };
8623
8624 template <typename OpType>
TestConvertDepthSpaceShuffle(ParameterizedOpConverterTestBase * test,const std::vector<DepthSpaceShuffleTestParams> & params)8625 void TestConvertDepthSpaceShuffle(
8626 ParameterizedOpConverterTestBase* test,
8627 const std::vector<DepthSpaceShuffleTestParams>& params) {
8628 Status status = Status::OK();
8629
8630 {
8631 // Input is a weight, should fail.
8632 test->Reset();
8633 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
8634 test->get_tf_type(), 2, "NCHW");
8635 test->AddTestWeights<float>("input", {1, 4, 1, 1}, {1, 2, 3, 4});
8636 test->RunValidationAndConversion(
8637 node_def, error::UNIMPLEMENTED,
8638 StrCat("The input \"input\" for ", node_def.op(), " must be a tensor"));
8639 }
8640 {
8641 // Input rank != 4
8642 test->Reset();
8643 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
8644 test->get_tf_type(), 2, "NCHW");
8645 test->AddTestTensor("input", {1, 16, 32});
8646 test->RunValidationAndConversion(
8647 node_def, error::INVALID_ARGUMENT,
8648 StrCat("The input to ", node_def.op(), " must be rank 4"));
8649 }
8650 {
8651 // Unsupported format, should fail.
8652 test->Reset();
8653 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
8654 test->get_tf_type(), 2, "NCHW_VECT_C");
8655 test->AddTestTensor("input", {1, 16, 32, 32});
8656 test->RunValidationAndConversion(
8657 node_def, error::UNIMPLEMENTED,
8658 "Data format NCHW_VECT_C is not supported");
8659 }
8660 if (test->get_trt_mode() != TrtTestMode::kDynamicShape) {
8661 // In dynamic shape mode, we cannot check input dimension values at
8662 // conversion time therefore we cannot confirm block_size vs input dim
8663 // consistency. We rely on the user to provide a valid TF graph. Otherwise
8664 // TRT will fail with a runtime error.
8665 if (std::is_same<OpType, ops::DepthToSpace>::value) {
8666 // Channels not divisible by block_size, should fail.
8667 test->Reset();
8668 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::DepthToSpace>(
8669 test->get_tf_type(), 3, "NCHW");
8670 test->AddTestTensor("input", {1, 16, 32, 32});
8671 test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8672 "Number of channels must be divisible by"
8673 " block_size*block_size");
8674 } else {
8675 { // Width not divisible by block_size, should fail.
8676 test->Reset();
8677 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(
8678 test->get_tf_type(), 3, "NCHW");
8679 test->AddTestTensor("input", {1, 16, 9, 32});
8680 test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8681 "Width and height must be divisible by"
8682 " block_size");
8683 }
8684 {
8685 // Height not divisible by block_size, should fail.
8686 test->Reset();
8687 NodeDef node_def = GetDepthSpaceShuffleNodeDef<ops::SpaceToDepth>(
8688 test->get_tf_type(), 3, "NCHW");
8689 test->AddTestTensor("input", {1, 16, 32, 9});
8690 test->RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
8691 "Width and height must be divisible by"
8692 " block_size");
8693 }
8694 }
8695 }
8696
8697 for (auto p : params) {
8698 test->Reset();
8699 NodeDef node_def = GetDepthSpaceShuffleNodeDef<OpType>(
8700 test->get_tf_type(), p.block_size, p.data_format);
8701 test->AddTestTensor("input", p.input_dims, p.input_value);
8702 test->TestOpConverter("my_shuffle", node_def, p.expected_output_dims,
8703 status, Status::OK(),
8704 ElementsAreArray(p.expected_output));
8705 }
8706 }
8707
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertDepthToSpace)8708 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertDepthToSpace) {
8709 const std::vector<int> common_input = CreateVectorIota<int>(16);
8710 std::vector<DepthSpaceShuffleTestParams> params = {
8711 {
8712 /*input_shape=*/{1, 4, 2, 2},
8713 /*input_value=*/common_input,
8714 /*block_size=*/2,
8715 /*data_format=*/"NCHW",
8716 /*expected_output_dims=*/{1, 1, 4, 4},
8717 /*expected_output=*/
8718 {0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15},
8719 },
8720 {
8721 /*input_shape=*/{1, 2, 2, 4},
8722 /*input_value=*/common_input,
8723 /*block_size=*/2,
8724 /*data_format=*/"NHWC",
8725 /*expected_output_dims=*/{1, 4, 4, 1},
8726 /*expected_output=*/
8727 {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
8728 },
8729 {
8730 /*input_shape=*/{1, 16, 1, 1},
8731 /*input_value=*/common_input,
8732 /*block_size=*/4,
8733 /*data_format=*/"NCHW",
8734 /*expected_output_dims=*/{1, 1, 4, 4},
8735 /*expected_output=*/CreateVectorIota<int>(16),
8736 },
8737 {
8738 /*input_shape=*/{1, 2, 2, 8},
8739 /*input_value=*/CreateVectorIota<int>(32),
8740 /*block_size=*/2,
8741 /*data_format=*/"NHWC",
8742 /*expected_output_dims=*/{1, 4, 4, 2},
8743 /*expected_output=*/{0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6,
8744 7, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25,
8745 26, 27, 20, 21, 22, 23, 28, 29, 30, 31},
8746 }};
8747
8748 TestConvertDepthSpaceShuffle<ops::DepthToSpace>(this, params);
8749 }
8750
TEST_P(OpConverter_FP32_FP16_INT32_Test,ConvertSpaceToDepth)8751 TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertSpaceToDepth) {
8752 const std::vector<int> common_input = CreateVectorIota<int>(16);
8753 std::vector<DepthSpaceShuffleTestParams> params = {
8754 {
8755 /*input_shape=*/{1, 1, 4, 4},
8756 /*input_value=*/common_input,
8757 /*block_size=*/2,
8758 /*data_format=*/"NCHW",
8759 /*expected_output_dims=*/{1, 4, 2, 2},
8760 /*expected_output=*/
8761 {0, 2, 8, 10, 1, 3, 9, 11, 4, 6, 12, 14, 5, 7, 13, 15},
8762 },
8763 {
8764 /*input_shape=*/{1, 4, 4, 1},
8765 /*input_value=*/common_input,
8766 /*block_size=*/2,
8767 /*data_format=*/"NHWC",
8768 /*expected_output_dims=*/{1, 2, 2, 4},
8769 /*expected_output=*/
8770 {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
8771 },
8772 {
8773 /*input_shape=*/{1, 1, 4, 4},
8774 /*input_value=*/common_input,
8775 /*block_size=*/4,
8776 /*data_format=*/"NCHW",
8777 /*expected_output_dims=*/{1, 16, 1, 1},
8778 /*expected_output=*/CreateVectorIota<int>(16),
8779 },
8780 {
8781 /*input_shape=*/{1, 4, 4, 2},
8782 /*input_value=*/CreateVectorIota<int>(32),
8783 /*block_size=*/2,
8784 /*data_format=*/"NHWC",
8785 /*expected_output_dims=*/{1, 2, 2, 8},
8786 /*expected_output=*/{0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6,
8787 7, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25,
8788 26, 27, 20, 21, 22, 23, 28, 29, 30, 31},
8789 },
8790 };
8791 TestConvertDepthSpaceShuffle<ops::SpaceToDepth>(this, params);
8792 }
8793
TEST_P(OpConverter_FP32_FP16_Test,ConvertClipByValue)8794 TEST_P(OpConverter_FP32_FP16_Test, ConvertClipByValue) {
8795 Scope s = Scope::NewRootScope();
8796 auto t = ops::Placeholder(s.WithOpName("t"), tf_type_);
8797 auto clip_value_min =
8798 ops::Placeholder(s.WithOpName("clip_value_min"), tf_type_);
8799 auto clip_value_max =
8800 ops::Placeholder(s.WithOpName("clip_value_max"), tf_type_);
8801 auto clip = ops::ClipByValue(s.WithOpName("my_clip"), t, clip_value_min,
8802 clip_value_max);
8803 const NodeDef& node_def = clip.operation.node()->def();
8804
8805 nvinfer1::DataType trt_type_;
8806 TF_ASSERT_OK(TfTypeToTrtType(tf_type_, &trt_type_));
8807
8808 {
8809 // Input is a weight, should fail.
8810 Reset();
8811 AddTestWeights("t", {1, 2, 3}, {1, 2, 3, 4, 5, 6}, tf_type_);
8812 AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
8813 AddTestWeights("clip_value_max", {1}, {5}, tf_type_);
8814 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8815 "The input \"t\" for ClipByValue must be a "
8816 "tensor");
8817 }
8818 {
8819 // Clip min is a tensor, should fail.
8820 Reset();
8821 AddTestTensor("t", {1, 2, 3});
8822 AddTestTensor("clip_value_min", {1});
8823 AddTestWeights("clip_value_max", {1}, {1}, tf_type_);
8824 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8825 "The input \"clip_value_min\" for ClipByValue "
8826 "must be a constant");
8827 }
8828 {
8829 // Clip max is a tensor, should fail.
8830 Reset();
8831 AddTestTensor("t", {1, 2, 3});
8832 AddTestWeights("clip_value_min", {1}, {1}, tf_type_);
8833 AddTestTensor("clip_value_max", {1});
8834 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8835 "The input \"clip_value_max\" for ClipByValue "
8836 "must be a constant");
8837 }
8838
8839 struct TestParams {
8840 std::vector<int> dims;
8841 int clip_value_min;
8842 int clip_value_max;
8843 std::vector<float> expected_output;
8844 };
8845
8846 const std::vector<float> common_input = CreateVectorIota<float>(6);
8847
8848 std::vector<TestParams> params = {{
8849 /*dims=*/{6},
8850 /*clip_value_min=*/2,
8851 /*clip_value_max=*/4,
8852 /*expected_output=*/{2, 2, 2, 3, 4, 4},
8853 },
8854 {
8855 /*dims=*/{1, 6},
8856 /*clip_value_min=*/2,
8857 /*clip_value_max=*/4,
8858 /*expected_output=*/{2, 2, 2, 3, 4, 4},
8859 },
8860 {
8861 /*dims=*/{1, 2, 3},
8862 /*clip_value_min=*/2,
8863 /*clip_value_max=*/4,
8864 /*expected_output=*/{2, 2, 2, 3, 4, 4},
8865 },
8866 {
8867 /*dims=*/{1, 2, 3, 1},
8868 /*clip_value_min=*/2,
8869 /*clip_value_max=*/4,
8870 /*expected_output=*/{2, 2, 2, 3, 4, 4},
8871 },
8872 {
8873 /*dims=*/{1, 1, 3, 1, 2},
8874 /*clip_value_min=*/2,
8875 /*clip_value_max=*/4,
8876 /*expected_output=*/{2, 2, 2, 3, 4, 4},
8877 },
8878 {
8879 /*dims=*/{1, 1, 3, 1, 2, 1},
8880 /*clip_value_min=*/2,
8881 /*clip_value_max=*/4,
8882 /*expected_output=*/{2, 2, 2, 3, 4, 4},
8883 },
8884 {
8885 /*dims=*/{2, 1, 3},
8886 /*clip_value_min=*/-1,
8887 /*clip_value_max=*/8,
8888 /*expected_output=*/common_input,
8889 }};
8890
8891 for (auto p : params) {
8892 Reset();
8893
8894 AddTestTensor("t", p.dims, tf_type_, common_input);
8895 AddTestWeights("clip_value_min", {1}, {p.clip_value_min}, tf_type_);
8896 AddTestWeights("clip_value_max", {1}, {p.clip_value_max}, tf_type_);
8897
8898 TestOpConverter("my_clip", node_def, p.dims,
8899 /*expected_conversion_status=*/Status::OK(),
8900 /*expected_runtime_status=*/Status::OK(),
8901 /*matcher=*/ElementsAreArray(p.expected_output));
8902 }
8903 }
8904
8905 // Get the NodeDef for SquaredDifference.
GetSquaredDifferenceNodeDef(DataType dtype)8906 NodeDef GetSquaredDifferenceNodeDef(DataType dtype) {
8907 Scope s = Scope::NewRootScope();
8908 auto x = ops::Placeholder(s.WithOpName("x"), dtype);
8909 auto y = ops::Placeholder(s.WithOpName("y"), dtype);
8910 auto squared_diff =
8911 ops::SquaredDifference(s.WithOpName("my_squared_diff"), x, y);
8912 return squared_diff.operation.node()->def();
8913 }
8914
TEST_P(OpConverter_FP32_FP16_Test,ConvertSquaredDifference)8915 TEST_P(OpConverter_FP32_FP16_Test, ConvertSquaredDifference) {
8916 {
8917 // Input is a weight, should fail.
8918 Reset();
8919 NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
8920 AddTestWeights<float>("x", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
8921 AddTestTensor("y", {1, 1, 2, 3});
8922 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
8923 "The input \"x\" for SquaredDifference must be "
8924 "a tensor");
8925 }
8926
8927 struct TestParams {
8928 std::vector<int> dims_x;
8929 std::vector<int> dims_y;
8930 std::vector<float> value_x;
8931 std::vector<float> value_y;
8932 std::vector<int> expected_output_dims;
8933 std::vector<float> expected_output;
8934 Status status;
8935 Status runtime_status;
8936 };
8937
8938 const std::vector<float> common_input = CreateVectorIota<float>(6);
8939 std::vector<TestParams> params = {
8940 {/*dims_x=*/{1, 2, 3},
8941 /*dims_y=*/{1, 7, 5},
8942 /*value_x=*/common_input,
8943 /*value_y=*/std::vector<float>(7 * 5, 0),
8944 /*expected_output_dims=*/{1, 1, 2, 3},
8945 /*expected_output=*/common_input,
8946 trt_mode_ == TrtTestMode::kDynamicShape
8947 ? Status::OK()
8948 : errors::InvalidArgument("Infeasible broadcast scheme"),
8949 errors::Internal(
8950 "Binding index out of range. This can happen if profile is not set, "
8951 "or the network is invalid for the current profile.")},
8952 {
8953 /*dims_x=*/{1, 1, 2, 3},
8954 /*dims_y=*/{1, 1, 2, 3},
8955 /*value_x=*/common_input,
8956 /*value_y=*/{0, -1, 3, 0, 10, -7},
8957 /*expected_output_dims=*/{1, 1, 2, 3},
8958 /*expected_output=*/{0, 4, 1, 9, 36, 144},
8959 },
8960 {
8961 /*dims_x=*/{1, 1, 2, 3},
8962 /*dims_y=*/{1, 1, 1, 3},
8963 /*value_x=*/common_input,
8964 /*value_y=*/{0, 1, 2},
8965 /*expected_output_dims=*/{1, 1, 2, 3},
8966 /*expected_output=*/{0, 0, 0, 9, 9, 9},
8967 },
8968 };
8969
8970 for (auto p : params) {
8971 Reset();
8972 NodeDef node_def = GetSquaredDifferenceNodeDef(tf_type_);
8973 AddTestTensor("x", p.dims_x, p.value_x);
8974 AddTestTensor("y", p.dims_y, p.value_y);
8975 TestOpConverter("my_squared_diff", node_def, p.expected_output_dims,
8976 p.status, p.runtime_status,
8977 ElementsAreArray(p.expected_output));
8978 }
8979 }
8980
8981 template <typename OpType>
MakeResizeNodeDef(DataType dtype,bool align_corners)8982 NodeDef MakeResizeNodeDef(DataType dtype, bool align_corners) {
8983 Scope s = Scope::NewRootScope();
8984 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
8985 auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32);
8986 auto attrs = typename OpType::Attrs().AlignCorners(align_corners);
8987 auto resize = OpType(s.WithOpName("my_resize"), input, size, attrs);
8988 return resize.operation.node()->def();
8989 }
8990
8991 struct ResizeTestParams {
8992 std::vector<int> input_dims;
8993 std::vector<int> output_resize_dims;
8994 std::vector<float> input_value;
8995 bool size_as_tensor;
8996 bool align_corners;
8997 std::vector<int> expected_output_dims;
8998 std::vector<float> expected_nearest_output_values;
8999 std::vector<float> expected_bilinear_output_values;
9000 Status status;
9001 };
9002
9003 template <typename OpType>
TestConvertResize(ParameterizedOpConverterTestBase * test,ResizeTestParams & p)9004 void TestConvertResize(ParameterizedOpConverterTestBase* test,
9005 ResizeTestParams& p) {
9006 test->Reset();
9007 // Create resize node.
9008 NodeDef node_def =
9009 MakeResizeNodeDef<OpType>(test->get_tf_type(), p.align_corners);
9010
9011 test->AddTestTensor("input", p.input_dims, test->get_tf_type(),
9012 p.input_value);
9013 // Create output size.
9014 if (p.size_as_tensor) {
9015 std::vector<int32> size_dims{2};
9016 std::vector<int32> size_values{p.output_resize_dims};
9017 test->AddTestTensor("size", size_dims, DT_INT32, size_values, size_dims);
9018 } else {
9019 test->AddTestWeights("size", {2}, p.output_resize_dims, DT_INT32);
9020 }
9021
9022 std::vector<float> expected_out;
9023
9024 if (node_def.op() == "ResizeBilinear") {
9025 expected_out = p.expected_bilinear_output_values;
9026 } else if (node_def.op() == "ResizeNearestNeighbor") {
9027 expected_out = p.expected_nearest_output_values;
9028 } else {
9029 ASSERT_TRUE(false);
9030 }
9031
9032 test->TestOpConverter("my_resize", node_def, p.expected_output_dims,
9033 /*expected_conversion_status=*/p.status,
9034 /*expected_runtime_status=*/p.status,
9035 /*matcher=*/ElementsAreArray(expected_out),
9036 /*out_tf_types=*/{DT_FLOAT});
9037 }
9038
TEST_P(OpConverter_FP32_FP16_Test,ConvertResize)9039 TEST_P(OpConverter_FP32_FP16_Test, ConvertResize) {
9040 {
9041 // First input is weight, should fail.
9042 Reset();
9043 NodeDef node_def = MakeResizeNodeDef<ops::ResizeBilinear>(tf_type_,
9044 /*align_corners=*/
9045 true);
9046 AddTestWeights<float>("input", {1, 2}, {1, 2});
9047 AddTestWeights<int>("size", {1, 2}, {1, 2});
9048 RunValidationAndConversion(
9049 node_def, error::UNIMPLEMENTED,
9050 "The input \"input\" for ResizeBilinear must be a "
9051 "tensor");
9052 }
9053
9054 std::vector<ResizeTestParams> params{
9055 {/*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
9056 /*output_resize_dims=*/{2, 3}, // H_out, W_out
9057 /*input_values=*/{2.0f, -1.0f},
9058 /*size_as_tensor=*/false,
9059 /*align_corners=*/false,
9060 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
9061 /*expected_nearest_output_values=*/
9062 {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
9063 /*expected_bilinear_output_values=*/
9064 {2.0f, 0.f, -1.0f, 2.0f, 0.f, -1.0f},
9065 /*status=*/Status::OK()},
9066 {/*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
9067 /*output_resize_dims=*/{2, 3}, // H_out, W_out
9068 /*input_values=*/{2.0f, -1.0f},
9069 /*size_as_tensor=*/false,
9070 /*align_corners=*/true,
9071 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
9072 /*expected_nearest_output_values=*/
9073 {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
9074 /*expected_bilinear_output_values=*/
9075 {2.0f, 0.5f, -1.0f, 2.0f, 0.5f, -1.0f},
9076 /*status=*/Status::OK()}};
9077
9078 if (trt_mode_ != TrtTestMode::kImplicitBatch) {
9079 // Size as a tensor is not supported in implicit batch mode.
9080 params.push_back({/*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
9081 /*output_resize_dims=*/{2, 3}, // H_out, W_out
9082 /*input_values=*/{2.0f, -1.0f},
9083 /*size_as_tensor=*/true,
9084 /*align_corners=*/true,
9085 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
9086 /*expected_nearest_output_values=*/
9087 {2.0f, 2.0f, -1.0f, 2.0f, 2.0f, -1.0f},
9088 /*expected_bilinear_output_values=*/
9089 {2.0f, 0.5f, -1.0f, 2.0f, 0.5f, -1.0f},
9090 /*status=*/Status::OK()});
9091 }
9092
9093 for (auto p : params) {
9094 TestConvertResize<ops::ResizeNearestNeighbor>(this, p);
9095
9096 // This use case is not supported as of TRT version 7.1
9097 #if IS_TRT_VERSION_GE(7, 1, 0, 0)
9098 if (!p.align_corners) {
9099 p.status = errors::InvalidArgument(
9100 "Cannot Convert Bilinear Resize when align_corners=False");
9101 }
9102 #endif
9103
9104 TestConvertResize<ops::ResizeBilinear>(this, p);
9105 }
9106 }
9107
MakePadNodeDef(std::string name,DataType dtype)9108 NodeDef MakePadNodeDef(std::string name, DataType dtype) {
9109 Scope s = Scope::NewRootScope();
9110 auto input = ops::Placeholder(s.WithOpName("input"), dtype);
9111 auto padding = ops::Placeholder(s.WithOpName("padding"), DT_INT32);
9112 auto pad = ops::Pad(s.WithOpName(name), input, padding);
9113 return pad.operation.node()->def();
9114 }
9115
9116 struct PadTestParams {
9117 std::vector<int> input_dims;
9118 std::vector<int> pad_dims;
9119 std::vector<int> pad_values;
9120 std::vector<float> input_values;
9121 std::vector<int> expected_output_dims;
9122 std::vector<float> expected_output_values;
9123 Status status;
9124 };
9125
TEST_P(OpConverter_FP32_FP16_Test,ConvertPad)9126 TEST_P(OpConverter_FP32_FP16_Test, ConvertPad) {
9127 {
9128 // First input is weight, should fail.
9129 Reset();
9130 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
9131 AddTestWeights("input", {1, 2}, {1, 2}, tf_type_);
9132 AddTestWeights<int>("padding", {1, 2}, {1, 2});
9133 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
9134 "The input \"tensor\" for Pad must be a "
9135 "tensor");
9136 }
9137 {
9138 // padding is a tensor, should fail.
9139 Reset();
9140 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
9141 AddTestTensor("input", {1, 2});
9142 AddTestTensor("padding", {1, 2});
9143 RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
9144 "The input \"paddings\" for Pad must be a "
9145 "constant");
9146 }
9147 {
9148 // Make sure that ranges are inferred across a Pad.
9149 Reset();
9150 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
9151 AddTestTensor("input", {1, 1, 2, 1});
9152 AddTestWeights<int>("padding", {4, 2}, {0, 0, 1, 0, 0, 1, 0, 0});
9153 TRT_TensorOrWeights input;
9154 TRT_TensorOrWeights output;
9155 RunValidationAndConversion(node_def);
9156 TF_EXPECT_OK(GetTensorOrWeights("input", &input));
9157 TF_EXPECT_OK(GetTensorOrWeights("my_pad", &output));
9158 ITensorProxyPtr input_tensor = input.tensor();
9159 converter_->ProvideQuantizationRange(&input_tensor, -5.0f, 5.0f);
9160 auto ranges = quantization_ranges();
9161 EXPECT_EQ(5.0f, ranges[input.tensor()->trt_tensor()]);
9162 }
9163
9164 std::vector<PadTestParams> params{
9165 // 1 padding dim
9166 {
9167 /*input_dims=*/{1, 1, 3, 2}, // N, H, W, C
9168 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9169 /*pad_values*/ {0, 0, 0, 0, 0, 1, 0, 0},
9170 /*input_values=*/{1, 2, 3, 4, 5, 6},
9171 /*expected_output_dims=*/{1, 1, 4, 2}, // N, H, W, C
9172 /*expected_output_values=*/
9173 {1, 2, 3, 4, 5, 6, 0, 0},
9174 },
9175 {
9176 /*input_dims=*/{1, 1, 3, 2}, // N, H, W, C
9177 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9178 /*pad_values*/ {0, 0, 0, 0, 0, 0, 0, 1},
9179 /*input_values=*/{1, 2, 3, 4, 5, 6},
9180 /*expected_output_dims=*/{1, 1, 3, 3}, // N, H, W, C
9181 /*expected_output_values=*/
9182 {1, 2, 0, 3, 4, 0, 5, 6, 0},
9183 },
9184 {
9185 /*input_dims=*/{1, 1, 3, 2}, // N, H, W, C
9186 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9187 /*pad_values*/ {0, 0, 1, 0, 0, 0, 0, 0},
9188 /*input_values=*/{1, 2, 3, 4, 5, 6},
9189 /*expected_output_dims=*/{1, 2, 3, 2}, // N, H, W, C
9190 /*expected_output_values=*/
9191 {0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6},
9192 },
9193 // 2 padding dims
9194 {
9195 /*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
9196 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9197 /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
9198 /*input_values=*/{2.0f, -1.0f},
9199 /*expected_output_dims=*/{1, 2, 3, 1}, // N, H, W, C
9200 /*expected_output_values=*/
9201 {0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
9202 },
9203 PadTestParams{
9204 /*input_dims=*/{1, 1, 2, 2}, // N, H, W, C
9205 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9206 /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0},
9207 /*input_values=*/{2, -1, 3., 4},
9208 /*expected_output_dims=*/{1, 2, 3, 2}, // N, H, W, C
9209 /*expected_output_values=*/
9210 {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
9211 },
9212 PadTestParams{
9213 /*input_dims=*/{1, 1, 2, 1, 2}, // N, C, H, W, D
9214 /*pad_dims=*/{5, 2}, // #dims, {pad_before, pad_after}
9215 /*pad_values*/ {0, 0, 1, 0, 0, 1, 0, 0, 0, 0},
9216 /*input_values=*/{2, -1, 3., 4},
9217 /*expected_output_dims=*/{1, 2, 3, 1, 2}, // N, H, W, C
9218 /*expected_output_values=*/
9219 {0, 0, 0, 0, 0, 0, 2, -1, 3, 4, 0, 0},
9220 },
9221 PadTestParams{
9222 /*input_dims=*/{1, 1, 2, 1, 2}, // N, C, H, W, D
9223 /*pad_dims=*/{5, 2}, // #dims, {pad_before, pad_after}
9224 /*pad_values*/ {0, 0, 0, 1, 0, 0, 1, 1, 0, 0},
9225 /*input_values=*/{2, -1, 3., 4},
9226 /*expected_output_dims=*/{1, 2, 2, 3, 2}, // N, H, W, C
9227 /*expected_output_values=*/
9228 {0., 0., 2., -1., 0., 0., 0., 0., 3., 4., 0., 0.,
9229 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0},
9230 },
9231 PadTestParams{
9232 /*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
9233 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9234 /*pad_values*/ {1, 0, 0, 0, 0, 1, 0, 0},
9235 /*input_values=*/{2.0f, -1.0f},
9236 /*expected_output_dims=*/{2, 1, 3, 1}, // N, H, W, C
9237 /*expected_output_values=*/{0.0, 0.0, 0.0, 2.0f, -1.0f, 0.0},
9238 trt_mode_ == TrtTestMode::kImplicitBatch
9239 ? errors::InvalidArgument("Padding layer does not support "
9240 "padding on batch dimension")
9241 : Status::OK()},
9242 PadTestParams{
9243 /*input_dims=*/{1, 1, 2, 1}, // N, H, W, C
9244 /*pad_dims=*/{4, 2}, // #dims, {pad_before, pad_after}
9245 /*pad_values*/ {0, 0, 1, 0, 0, 1, 1, 1},
9246 /*input_values=*/{2.0f, -1.0f},
9247 /*expected_output_dims=*/{}, // N, H, W, C
9248 /*expected_output_values=*/{},
9249 errors::InvalidArgument("Padding layer does not support padding on "
9250 "> 2")},
9251 PadTestParams{
9252 /*input_dims=*/{1, 2, 2}, // N, H, W
9253 /*pad_dims=*/{3, 2}, // #dims, {pad_before, pad_after}
9254 /*pad_values*/ {0, 0, 1, 0, 0, 1},
9255 /*input_values=*/{2, -1, 3., 4},
9256 /*expected_output_dims=*/{1, 3, 3}, // N, H, W, C
9257 /*expected_output_values=*/
9258 {0., 0., 0., 2., -1., 0., 3., 4., 0.},
9259 errors::InvalidArgument("Convertpad requires at least 4D input")}};
9260
9261 for (auto p : params) {
9262 Reset();
9263 // Create pad node.
9264 NodeDef node_def = MakePadNodeDef("my_pad", tf_type_);
9265 // Create input tensor.
9266 AddTestTensor("input", p.input_dims, p.input_values);
9267 // Create output size.
9268 AddTestWeights<int32>("padding", p.pad_dims, p.pad_values);
9269 TestOpConverter("my_pad", node_def, p.expected_output_dims, p.status,
9270 p.status, ElementsAreArray(p.expected_output_values));
9271 }
9272 }
9273 } // namespace convert
9274 } // namespace tensorrt
9275 } // namespace tensorflow
9276
9277 #endif // GOOGLE_CUDA && GOOGLE_TENSORRT
9278