xref: /aosp_15_r20/external/tensorflow/tensorflow/core/kernels/conv_ops_test.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <string>
17 #include <vector>
18 
19 #include "absl/algorithm/container.h"
20 #include "tensorflow/cc/ops/const_op.h"
21 #include "tensorflow/cc/ops/image_ops.h"
22 #include "tensorflow/cc/ops/nn_ops.h"
23 #include "tensorflow/cc/ops/nn_ops_internal.h"
24 #include "tensorflow/cc/ops/standard_ops.h"
25 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
26 #include "tensorflow/core/framework/fake_input.h"
27 #include "tensorflow/core/framework/node_def_builder.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/types.pb.h"
30 #include "tensorflow/core/kernels/conv_ops_gpu.h"
31 #include "tensorflow/core/kernels/ops_testutil.h"
32 #include "tensorflow/core/kernels/ops_util.h"
33 #include "tensorflow/core/lib/core/status_test_util.h"
34 #include "tensorflow/core/platform/tensor_float_32_utils.h"
35 #include "tensorflow/core/platform/test.h"
36 #include "tensorflow/core/platform/test_benchmark.h"
37 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
38 #include "tensorflow/core/public/session.h"
39 
40 namespace tensorflow {
41 
42 class FusedResizePadConvOpTest : public OpsTestBase {
43  protected:
44   template <typename T>
HandwrittenConv(DataType dtype)45   void HandwrittenConv(DataType dtype) {
46     const int stride = 1;
47     TF_EXPECT_OK(NodeDefBuilder("fused_resize_op", "FusedResizeAndPadConv2D")
48                      .Input(FakeInput(dtype))
49                      .Input(FakeInput(DT_INT32))
50                      .Input(FakeInput(DT_INT32))
51                      .Input(FakeInput(dtype))
52                      .Attr("T", dtype)
53                      .Attr("resize_align_corners", false)
54                      .Attr("mode", "REFLECT")
55                      .Attr("strides", {1, stride, stride, 1})
56                      .Attr("padding", "SAME")
57                      .Finalize(node_def()));
58     TF_EXPECT_OK(InitOp());
59     const int depth = 1;
60     const int image_width = 4;
61     const int image_height = 3;
62     const int image_batch_count = 1;
63     // The image matrix is:
64     // |  1 |  2 |  3 |  4 |
65     // |  5 |  6 |  7 |  8 |
66     // |  9 | 10 | 11 | 12 |
67     Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
68     test::FillValues<T>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
69 
70     // The filter matrix is:
71     // | 1 | 4 | 7 |
72     // | 2 | 5 | 8 |
73     // | 3 | 6 | 9 |
74     const int filter_size = 3;
75     const int filter_count = 1;
76     Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
77     test::FillValues<T>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
78 
79     const int resized_width = image_width;
80     const int resized_height = image_height;
81 
82     const int top_padding = 0;
83     const int bottom_padding = 0;
84     const int left_padding = 0;
85     const int right_padding = 0;
86 
87     AddInputFromArray<T>(image.shape(), image.flat<T>());
88     AddInputFromArray<int32>(TensorShape({2}), {resized_height, resized_width});
89     AddInputFromArray<int32>(
90         TensorShape({4, 2}),
91         {0, 0, top_padding, bottom_padding, left_padding, right_padding, 0, 0});
92     AddInputFromArray<T>(filter.shape(), filter.flat<T>());
93     TF_ASSERT_OK(RunOpKernel());
94 
95     // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
96     // the input set to zero because we're using the 'SAME' padding mode.
97     // The calculations behind the expected output are:
98     // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
99     // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
100     // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
101     // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
102     // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
103     // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
104     // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
105     // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
106     // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
107     // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
108     // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
109     // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
110     // This means we should end up with this matrix:
111     // |  105  |  150  |  183  |   95  |
112     // |  235  |  312  |  357  |  178  |
113     // |  187  |  234  |  261  |  121  |
114     const int expected_width = image_width;
115     const int expected_height = image_height * filter_count;
116     Tensor expected(dtype, TensorShape({image_batch_count, expected_height,
117                                         expected_width, filter_count}));
118     test::FillValues<T>(
119         &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
120     const Tensor& output = *GetOutput(0);
121     test::ExpectTensorNear<T>(expected, output, 1e-5);
122   }
123 
124   template <typename T>
CompareFusedAndSeparate(int input_width,int input_height,int input_depth,int resize_width,int resize_height,int y_padding,int x_padding,int filter_size,int filter_count,bool resize_align_corners,const string & pad_mode,int stride,const string & padding,DataType dtype)125   void CompareFusedAndSeparate(int input_width, int input_height,
126                                int input_depth, int resize_width,
127                                int resize_height, int y_padding, int x_padding,
128                                int filter_size, int filter_count,
129                                bool resize_align_corners,
130                                const string& pad_mode, int stride,
131                                const string& padding, DataType dtype) {
132     Scope root = tensorflow::Scope::NewRootScope();
133     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
134 
135     Tensor input_data(DT_FLOAT,
136                       TensorShape({1, input_height, input_width, input_depth}));
137     test::FillIota<float>(&input_data, 1.0f);
138     Output input =
139         Const(root.WithOpName("input"), Input::Initializer(input_data));
140     Output casted_input = Cast(root.WithOpName("casted_input"), input, dtype);
141 
142     Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
143                                               input_depth, filter_count}));
144     test::FillIota<float>(&filter_data, 1.0f);
145     Output filter =
146         Const(root.WithOpName("filter"), Input::Initializer(filter_data));
147     Output casted_filter =
148         Cast(root.WithOpName("casted_filter"), filter, dtype);
149 
150     Output resize_size =
151         Const(root.WithOpName("resize_size"), {resize_height, resize_width});
152     Output resize =
153         ResizeBilinear(root.WithOpName("resize"), input, resize_size,
154                        ResizeBilinear::AlignCorners(resize_align_corners));
155     // Bilinear resize only output float, cast it to dtype to match the input.
156     Output casted_resize = Cast(root.WithOpName("cast"), resize, dtype);
157     Output paddings =
158         Const(root.WithOpName("paddings"),
159               {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
160     Output mirror_pad = MirrorPad(root.WithOpName("mirror_pad"), casted_resize,
161                                   paddings, pad_mode);
162     Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, casted_filter,
163                          {1, stride, stride, 1}, padding);
164 
165     Output fused_conv = FusedResizeAndPadConv2D(
166         root.WithOpName("fused_conv"), casted_input, resize_size, paddings,
167         casted_filter, pad_mode, {1, stride, stride, 1}, padding,
168         FusedResizeAndPadConv2D::ResizeAlignCorners(resize_align_corners));
169 
170     tensorflow::GraphDef graph;
171     TF_ASSERT_OK(root.ToGraphDef(&graph));
172 
173     std::unique_ptr<tensorflow::Session> session(
174         tensorflow::NewSession(tensorflow::SessionOptions()));
175     TF_ASSERT_OK(session->Create(graph));
176 
177     std::vector<Tensor> unfused_tensors;
178     TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
179 
180     std::vector<Tensor> fused_tensors;
181     TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
182 
183     test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
184   }
185 
186   template <typename T>
CompareFusedPadOnlyAndSeparate(int input_width,int input_height,int input_depth,int y_padding,int x_padding,int filter_size,int filter_count,const string & pad_mode,int stride,const string & padding,DataType dtype)187   void CompareFusedPadOnlyAndSeparate(int input_width, int input_height,
188                                       int input_depth, int y_padding,
189                                       int x_padding, int filter_size,
190                                       int filter_count, const string& pad_mode,
191                                       int stride, const string& padding,
192                                       DataType dtype) {
193     Scope root = tensorflow::Scope::NewRootScope();
194     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
195 
196     Tensor input_data(DT_FLOAT,
197                       TensorShape({1, input_height, input_width, input_depth}));
198     test::FillIota<float>(&input_data, 1.0f);
199     Output input =
200         Const(root.WithOpName("input"), Input::Initializer(input_data));
201     Output casted_input = Cast(root.WithOpName("casted_input"), input, dtype);
202 
203     Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
204                                               input_depth, filter_count}));
205     test::FillIota<float>(&filter_data, 1.0f);
206     Output filter =
207         Const(root.WithOpName("filter"), Input::Initializer(filter_data));
208     Output casted_filter =
209         Cast(root.WithOpName("casted_filter"), filter, dtype);
210 
211     Output paddings =
212         Const(root.WithOpName("paddings"),
213               {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
214     Output mirror_pad = MirrorPad(root.WithOpName("mirror_pad"), casted_input,
215                                   paddings, pad_mode);
216     Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, casted_filter,
217                          {1, stride, stride, 1}, padding);
218 
219     Output fused_conv = FusedPadConv2D(
220         root.WithOpName("fused_conv"), casted_input, paddings, casted_filter,
221         pad_mode, {1, stride, stride, 1}, padding);
222 
223     tensorflow::GraphDef graph;
224     TF_ASSERT_OK(root.ToGraphDef(&graph));
225 
226     std::unique_ptr<tensorflow::Session> session(
227         tensorflow::NewSession(tensorflow::SessionOptions()));
228     TF_ASSERT_OK(session->Create(graph));
229 
230     std::vector<Tensor> unfused_tensors;
231     TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
232 
233     std::vector<Tensor> fused_tensors;
234     TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
235 
236     test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
237   }
238 };
239 
TEST_F(FusedResizePadConvOpTest,HandwrittenConvHalf)240 TEST_F(FusedResizePadConvOpTest, HandwrittenConvHalf) {
241   HandwrittenConv<Eigen::half>(DT_HALF);
242 }
243 
TEST_F(FusedResizePadConvOpTest,HandwrittenConvFloat)244 TEST_F(FusedResizePadConvOpTest, HandwrittenConvFloat) {
245   HandwrittenConv<float>(DT_FLOAT);
246 }
247 
TEST_F(FusedResizePadConvOpTest,HandwrittenConvDouble)248 TEST_F(FusedResizePadConvOpTest, HandwrittenConvDouble) {
249   HandwrittenConv<double>(DT_DOUBLE);
250 }
251 
TEST_F(FusedResizePadConvOpTest,IdentityComparativeHalf)252 TEST_F(FusedResizePadConvOpTest, IdentityComparativeHalf) {
253   CompareFusedAndSeparate<Eigen::half>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
254                                        "REFLECT", 1, "SAME", DT_HALF);
255 }
256 
TEST_F(FusedResizePadConvOpTest,IdentityComparativeFloat)257 TEST_F(FusedResizePadConvOpTest, IdentityComparativeFloat) {
258   CompareFusedAndSeparate<float>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
259                                  "REFLECT", 1, "SAME", DT_FLOAT);
260 }
261 
TEST_F(FusedResizePadConvOpTest,IdentityComparativeDouble)262 TEST_F(FusedResizePadConvOpTest, IdentityComparativeDouble) {
263   CompareFusedAndSeparate<double>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
264                                   "REFLECT", 1, "SAME", DT_DOUBLE);
265 }
266 
TEST_F(FusedResizePadConvOpTest,ConvOnlyComparative)267 TEST_F(FusedResizePadConvOpTest, ConvOnlyComparative) {
268   CompareFusedAndSeparate<float>(10, 10, 3, 10, 10, 0, 0, 4, 4, false,
269                                  "REFLECT", 1, "SAME", DT_FLOAT);
270 }
271 
TEST_F(FusedResizePadConvOpTest,ResizeOnlyComparative)272 TEST_F(FusedResizePadConvOpTest, ResizeOnlyComparative) {
273   CompareFusedAndSeparate<float>(10, 10, 1, 20, 20, 0, 0, 1, 1, false,
274                                  "REFLECT", 1, "SAME", DT_FLOAT);
275 }
276 
TEST_F(FusedResizePadConvOpTest,ResizeAndConvComparative)277 TEST_F(FusedResizePadConvOpTest, ResizeAndConvComparative) {
278   CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 1,
279                                  "SAME", DT_FLOAT);
280 }
281 
TEST_F(FusedResizePadConvOpTest,ResizeAlignAndConvComparative)282 TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvComparative) {
283   CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
284                                  "SAME", DT_FLOAT);
285 }
286 
TEST_F(FusedResizePadConvOpTest,ResizeAndConvStridedComparative)287 TEST_F(FusedResizePadConvOpTest, ResizeAndConvStridedComparative) {
288   CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 2,
289                                  "SAME", DT_FLOAT);
290 }
291 
TEST_F(FusedResizePadConvOpTest,ResizeAlignAndConvValidComparative)292 TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvValidComparative) {
293   CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
294                                  "VALID", DT_FLOAT);
295 }
296 
TEST_F(FusedResizePadConvOpTest,PadOnlyComparative)297 TEST_F(FusedResizePadConvOpTest, PadOnlyComparative) {
298   CompareFusedAndSeparate<float>(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
299                                  "SAME", DT_FLOAT);
300 }
301 
TEST_F(FusedResizePadConvOpTest,PadOnlyWithChannelsComparative)302 TEST_F(FusedResizePadConvOpTest, PadOnlyWithChannelsComparative) {
303   CompareFusedAndSeparate<float>(4, 4, 3, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
304                                  "SAME", DT_FLOAT);
305 }
306 
TEST_F(FusedResizePadConvOpTest,ResizeAndPadComparative)307 TEST_F(FusedResizePadConvOpTest, ResizeAndPadComparative) {
308   CompareFusedAndSeparate<float>(4, 4, 1, 6, 6, 2, 2, 1, 1, false, "REFLECT", 1,
309                                  "SAME", DT_FLOAT);
310 }
311 
TEST_F(FusedResizePadConvOpTest,PadOnlySymmetricComparative)312 TEST_F(FusedResizePadConvOpTest, PadOnlySymmetricComparative) {
313   CompareFusedAndSeparate<float>(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "SYMMETRIC",
314                                  1, "SAME", DT_FLOAT);
315 }
316 
TEST_F(FusedResizePadConvOpTest,ResizeAndPadSymmetricComparative)317 TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparative) {
318   CompareFusedAndSeparate<float>(4, 4, 3, 6, 6, 2, 2, 1, 1, false, "SYMMETRIC",
319                                  1, "SAME", DT_FLOAT);
320 }
321 
TEST_F(FusedResizePadConvOpTest,ResizeAndPadSymmetricComparativeLarge)322 TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparativeLarge) {
323   CompareFusedAndSeparate<float>(1000, 1000, 3, 1006, 1006, 2, 2, 1, 1, false,
324                                  "SYMMETRIC", 1, "SAME", DT_FLOAT);
325 }
326 
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeHalf)327 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeHalf) {
328   CompareFusedPadOnlyAndSeparate<Eigen::half>(10, 10, 1, 0, 0, 1, 1, "REFLECT",
329                                               1, "SAME", DT_HALF);
330 }
331 
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeFloat)332 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeFloat) {
333   CompareFusedPadOnlyAndSeparate<float>(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1,
334                                         "SAME", DT_FLOAT);
335 }
336 
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeDouble)337 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeDouble) {
338   CompareFusedPadOnlyAndSeparate<double>(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1,
339                                          "SAME", DT_DOUBLE);
340 }
341 
TEST_F(FusedResizePadConvOpTest,NoResizeConvOnlyComparative)342 TEST_F(FusedResizePadConvOpTest, NoResizeConvOnlyComparative) {
343   CompareFusedPadOnlyAndSeparate<float>(10, 10, 3, 0, 0, 4, 4, "REFLECT", 1,
344                                         "SAME", DT_FLOAT);
345 }
346 
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlyComparative)347 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyComparative) {
348   CompareFusedPadOnlyAndSeparate<float>(4, 4, 1, 2, 2, 1, 1, "REFLECT", 1,
349                                         "SAME", DT_FLOAT);
350 }
351 
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlyWithChannelsComparative)352 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyWithChannelsComparative) {
353   CompareFusedPadOnlyAndSeparate<float>(4, 4, 3, 2, 2, 1, 1, "REFLECT", 1,
354                                         "SAME", DT_FLOAT);
355 }
356 
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlySymmetricComparative)357 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlySymmetricComparative) {
358   CompareFusedPadOnlyAndSeparate<float>(4, 4, 1, 2, 2, 1, 1, "SYMMETRIC", 1,
359                                         "SAME", DT_FLOAT);
360 }
361 
362 class ConvOpTest : public OpsTestBase {
363  protected:
HandwrittenConv()364   void HandwrittenConv() {
365     const int stride = 1;
366     TF_EXPECT_OK(NodeDefBuilder("conv_op", "Conv2D")
367                      .Input(FakeInput(DT_FLOAT))
368                      .Input(FakeInput(DT_FLOAT))
369                      .Attr("T", DT_FLOAT)
370                      .Attr("strides", {1, stride, stride, 1})
371                      .Attr("padding", "SAME")
372                      .Finalize(node_def()));
373     TF_EXPECT_OK(InitOp());
374     const int depth = 1;
375     const int image_width = 4;
376     const int image_height = 3;
377     const int image_batch_count = 1;
378     // The image matrix is:
379     // |  1 |  2 |  3 |  4 |
380     // |  5 |  6 |  7 |  8 |
381     // |  9 | 10 | 11 | 12 |
382     Tensor image(DT_FLOAT,
383                  {image_batch_count, image_height, image_width, depth});
384     test::FillValues<float>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
385 
386     // The filter matrix is:
387     // | 1 | 4 | 7 |
388     // | 2 | 5 | 8 |
389     // | 3 | 6 | 9 |
390     const int filter_size = 3;
391     const int filter_count = 1;
392     Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
393     test::FillValues<float>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
394 
395     AddInputFromArray<float>(image.shape(), image.flat<float>());
396     AddInputFromArray<float>(filter.shape(), filter.flat<float>());
397     TF_ASSERT_OK(RunOpKernel());
398 
399     // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
400     // the input set to zero because we're using the 'SAME' padding mode.
401     // The calculations behind the expected output are:
402     // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
403     // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
404     // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
405     // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
406     // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
407     // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
408     // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
409     // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
410     // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
411     // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
412     // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
413     // (1*7)+(4*8)+(7*0)+(2*11)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
414     // This means we should end up with this matrix:
415     // |  105  |  150  |  183  |   95  |
416     // |  235  |  312  |  357  |  178  |
417     // |  187  |  234  |  261  |  121  |
418     const int expected_width = image_width;
419     const int expected_height = image_height * filter_count;
420     Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
421                                            expected_width, filter_count}));
422     test::FillValues<float>(
423         &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
424     const Tensor& output = *GetOutput(0);
425     test::ExpectTensorNear<float>(expected, output, 1e-5);
426   }
427 
AnisotropicStrides()428   void AnisotropicStrides() {
429     const int stride_width = 3;
430     const int stride_height = 1;
431     TF_EXPECT_OK(NodeDefBuilder("conv_op", "Conv2D")
432                      .Input(FakeInput(DT_FLOAT))
433                      .Input(FakeInput(DT_FLOAT))
434                      .Attr("T", DT_FLOAT)
435                      .Attr("strides", {1, stride_height, stride_width, 1})
436                      .Attr("padding", "VALID")
437                      .Finalize(node_def()));
438     TF_EXPECT_OK(InitOp());
439     const int depth = 1;
440     const int image_width = 6;
441     const int image_height = 3;
442     const int image_batch_count = 1;
443     Tensor image(DT_FLOAT,
444                  {image_batch_count, image_height, image_width, depth});
445     test::FillValues<float>(&image, {
446                                         3, 2, 1, -1, -2, -3,  //
447                                         4, 3, 2, -2, -3, -4,  //
448                                         5, 4, 3, -3, -4, -5,  //
449                                     });
450     const int filter_size = 2;
451     const int filter_count = 1;
452     Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
453     test::FillValues<float>(&filter, {
454                                          1, 2,  //
455                                          3, 4,  //
456                                      });
457 
458     AddInputFromArray<float>(image.shape(), image.flat<float>());
459     AddInputFromArray<float>(filter.shape(), filter.flat<float>());
460     TF_ASSERT_OK(RunOpKernel());
461 
462     const int expected_width = 2;
463     const int expected_height = 2;
464     Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
465                                            expected_width, filter_count}));
466     test::FillValues<float>(&expected, {31, -23, 41, -33});
467     const Tensor& output = *GetOutput(0);
468     test::ExpectTensorNear<float>(expected, output, 1e-5);
469   }
470 };
471 
TEST_F(ConvOpTest,HandwrittenConv)472 TEST_F(ConvOpTest, HandwrittenConv) { HandwrittenConv(); }
473 
TEST_F(ConvOpTest,AnisotropicStride)474 TEST_F(ConvOpTest, AnisotropicStride) { AnisotropicStrides(); }
475 
476 template <typename T>
477 class FusedConv2DOpTest : public OpsTestBase {
478  protected:
479   static constexpr int kDepth = 3;
480   static constexpr int kImageWidth = 32;
481   static constexpr int kImageHeight = 32;
482   static constexpr int kImageBatchCount = 8;
483 
484   using BiasAddGraphRunner =
485       std::function<void(const Tensor& input_data, const Tensor& filter_data,
486                          const Tensor& bias_data, Tensor* out)>;
487 
488   using BatchNormGraphRunner = std::function<void(
489       const Tensor& input_data, const Tensor& filter_data,
490       const Tensor& scale_data, const Tensor& offset_data,
491       const Tensor& mean_data, const Tensor& variance_data, Tensor* out)>;
492 
493   // Runs a Tensorflow graph defined by the root scope, and fetches the result
494   // of 'fetch' node into the output Tensor. Optional `fetch_node` parameter
495   // allows to define a fetch node directly using a NodeDef for the ops that are
496   // not supported by the C++ Api.
RunAndFetch(const tensorflow::Scope & root,const string & fetch,Tensor * output,bool allow_gpu_device,const NodeDef * fetch_node=nullptr)497   void RunAndFetch(const tensorflow::Scope& root, const string& fetch,
498                    Tensor* output, bool allow_gpu_device,
499                    const NodeDef* fetch_node = nullptr) {
500     tensorflow::GraphDef graph;
501     TF_ASSERT_OK(root.ToGraphDef(&graph));
502 
503     if (fetch_node) {
504       *graph.add_node() = *fetch_node;
505     }
506 
507     // We really want to make sure that graph executed exactly as we passed it
508     // to the session, so we disable various optimizations.
509     tensorflow::SessionOptions session_options;
510 
511     // Disable common runtime constant folding.
512     session_options.config.mutable_graph_options()
513         ->mutable_optimizer_options()
514         ->set_opt_level(OptimizerOptions::L0);
515 
516     // Disable Grappler optimizations for tests.
517     tensorflow::RewriterConfig* cfg =
518         session_options.config.mutable_graph_options()
519             ->mutable_rewrite_options();
520     cfg->set_constant_folding(tensorflow::RewriterConfig::OFF);
521     cfg->set_layout_optimizer(tensorflow::RewriterConfig::OFF);
522     cfg->set_remapping(tensorflow::RewriterConfig::OFF);
523 
524     std::unique_ptr<tensorflow::Session> session(
525         tensorflow::NewSession(session_options));
526 
527     std::vector<DeviceAttributes> available_devices;
528     TF_ASSERT_OK(session->ListDevices(&available_devices))
529         << "Failed to get available session devices";
530 
531     // Check if session has an available GPU device.
532     const bool has_gpu_device =
533         absl::c_any_of(available_devices, [](const DeviceAttributes& device) {
534           return device.device_type() == DEVICE_GPU;
535         });
536 
537     // Some of the `FusedConv2D` fusion types are implemented only for CPU, and
538     // in this test we don't want to compare GPU vs CPU numbers, so place all
539     // nodes on CPU in this case.
540     const bool place_all_on_gpu = allow_gpu_device && has_gpu_device;
541 
542     const string device = place_all_on_gpu ? "/device:GPU:0" : "/device:CPU:0";
543     for (NodeDef& mutable_node : *graph.mutable_node()) {
544       mutable_node.set_device(device);
545     }
546 
547     TF_ASSERT_OK(session->Create(graph));
548 
549     std::vector<Tensor> unfused_tensors;
550     TF_ASSERT_OK(session->Run({}, {fetch}, {}, &unfused_tensors));
551 
552     *output = unfused_tensors[0];
553   }
554 
RunConv2DWithBias(const Tensor & input_data,const Tensor & filter_data,const Tensor & bias_data,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)555   void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data,
556                          const Tensor& bias_data, const std::string& padding,
557                          const std::vector<int>& explicit_paddings,
558                          Tensor* output, bool allow_gpu_device = false,
559                          int stride = 1) {
560     Scope root = tensorflow::Scope::NewRootScope();
561 
562     ops::Conv2D conv = ops::Conv2D(
563         root.WithOpName("conv"),
564         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
565         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
566         {1, stride, stride, 1}, padding,
567         ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
568 
569     ops::BiasAdd with_bias = ops::BiasAdd(
570         root.WithOpName("with_bias"), conv,
571         ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
572 
573     RunAndFetch(root, "with_bias", output, allow_gpu_device);
574   }
575 
RunConv2DWithBiasAndActivation(const Tensor & input_data,const Tensor & filter_data,const Tensor & bias_data,const string & activation_type,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)576   void RunConv2DWithBiasAndActivation(
577       const Tensor& input_data, const Tensor& filter_data,
578       const Tensor& bias_data, const string& activation_type,
579       const std::string& padding, const std::vector<int>& explicit_paddings,
580       Tensor* output, bool allow_gpu_device = false, int stride = 1) {
581     Scope root = tensorflow::Scope::NewRootScope();
582 
583     ops::Conv2D conv = ops::Conv2D(
584         root.WithOpName("conv"),
585         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
586         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
587         {1, stride, stride, 1}, padding,
588         ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
589 
590     ops::BiasAdd with_bias = ops::BiasAdd(
591         root.WithOpName("with_bias"), conv,
592         ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
593 
594     if (activation_type == "Relu") {
595       ops::Relu(root.WithOpName("with_activation"), with_bias);
596     } else if (activation_type == "Relu6") {
597       ops::Relu6(root.WithOpName("with_activation"), with_bias);
598     } else if (activation_type == "Elu") {
599       ops::Elu(root.WithOpName("with_activation"), with_bias);
600     } else if (activation_type == "LeakyRelu") {
601       ops::internal::LeakyRelu(root.WithOpName("with_activation"), with_bias);
602     } else {
603       ops::Identity(root.WithOpName("with_activation"), with_bias);
604     }
605 
606     RunAndFetch(root, "with_activation", output, allow_gpu_device);
607   }
608 
RunConv2DWithBatchNorm(const Tensor & input_data,const Tensor & filter_data,const Tensor & scale_data,const Tensor & offset_data,const Tensor & mean_data,const Tensor & variance_data,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)609   void RunConv2DWithBatchNorm(
610       const Tensor& input_data, const Tensor& filter_data,
611       const Tensor& scale_data, const Tensor& offset_data,
612       const Tensor& mean_data, const Tensor& variance_data,
613       const std::string& padding, const std::vector<int>& explicit_paddings,
614       Tensor* output, bool allow_gpu_device = false, int stride = 1) {
615     Scope root = tensorflow::Scope::NewRootScope();
616 
617     ops::Conv2D conv = ops::Conv2D(
618         root.WithOpName("conv"),
619         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
620         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
621         {1, stride, stride, 1}, padding,
622         ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
623 
624     ops::FusedBatchNorm::Attrs attr;
625     attr = attr.IsTraining(false);
626 
627     ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
628         root.WithOpName("with_fused_batch_norm"), conv,
629         ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
630         ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
631         ops::Const(root.WithOpName("mean"), Input::Initializer(mean_data)),
632         ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
633         attr);
634 
635     RunAndFetch(root, "with_fused_batch_norm", output, allow_gpu_device);
636   }
637 
RunConv2DWithBatchNormAndActivation(const Tensor & input_data,const Tensor & filter_data,const Tensor & scale_data,const Tensor & offset_data,const Tensor & mean_data,const Tensor & variance_data,const string & activation_type,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)638   void RunConv2DWithBatchNormAndActivation(
639       const Tensor& input_data, const Tensor& filter_data,
640       const Tensor& scale_data, const Tensor& offset_data,
641       const Tensor& mean_data, const Tensor& variance_data,
642       const string& activation_type, const std::string& padding,
643       const std::vector<int>& explicit_paddings, Tensor* output,
644       bool allow_gpu_device = false, int stride = 1) {
645     Scope root = tensorflow::Scope::NewRootScope();
646 
647     ops::Conv2D conv = ops::Conv2D(
648         root.WithOpName("conv"),
649         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
650         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
651         {1, stride, stride, 1}, padding,
652         ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
653 
654     ops::FusedBatchNorm::Attrs attr;
655     attr = attr.IsTraining(false);
656 
657     ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
658         root.WithOpName("with_fused_batch_norm"), conv,
659         ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
660         ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
661         ops::Const(root.WithOpName("mean"), Input::Initializer(mean_data)),
662         ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
663         attr);
664 
665     if (activation_type == "Relu") {
666       ops::Relu(root.WithOpName("with_activation"), with_fused_batch_norm.y);
667     } else if (activation_type == "Relu6") {
668       ops::Relu6(root.WithOpName("with_activation"), with_fused_batch_norm.y);
669     } else if (activation_type == "Elu") {
670       ops::Elu(root.WithOpName("with_activation"), with_fused_batch_norm.y);
671     } else if (activation_type == "LeakyRelu") {
672       ops::internal::LeakyRelu(root.WithOpName("with_activation"),
673                                with_fused_batch_norm.y);
674     } else {
675       ops::Identity(root.WithOpName("with_activation"),
676                     with_fused_batch_norm.y);
677     }
678 
679     RunAndFetch(root, "with_activation", output, allow_gpu_device);
680   }
681 
RunFusedConv2DOp(const Tensor & input_data,const Tensor & filter_data,const std::vector<Tensor> & args_data,const std::vector<string> & fused_ops,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)682   void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data,
683                         const std::vector<Tensor>& args_data,
684                         const std::vector<string>& fused_ops,
685                         const std::string& padding,
686                         const std::vector<int>& explicit_paddings,
687                         Tensor* output, bool allow_gpu_device = false,
688                         int stride = 1) {
689     Scope root = tensorflow::Scope::NewRootScope();
690 
691     DataType dtype = DataTypeToEnum<T>::v();
692     int num_args = static_cast<int>(args_data.size());
693 
694     Output input =
695         ops::Const(root.WithOpName("input"), Input::Initializer(input_data));
696     Output filter =
697         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data));
698 
699     std::vector<NodeDefBuilder::NodeOut> args;
700     for (int i = 0; i < num_args; ++i) {
701       Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)),
702                               Input::Initializer(args_data[i]));
703       args.emplace_back(arg.name(), 0, dtype);
704     }
705 
706     NodeDef fused_conv2d;
707     TF_EXPECT_OK(NodeDefBuilder("fused_conv", "_FusedConv2D")
708                      .Input({input.name(), 0, dtype})
709                      .Input({filter.name(), 0, dtype})
710                      .Input(args)
711                      .Attr("num_args", num_args)
712                      .Attr("T", dtype)
713                      .Attr("strides", {1, stride, stride, 1})
714                      .Attr("padding", padding)
715                      .Attr("explicit_paddings", explicit_paddings)
716                      .Attr("fused_ops", fused_ops)
717                      .Finalize(&fused_conv2d));
718 
719     RunAndFetch(root, fused_conv2d.name(), output, allow_gpu_device,
720                 &fused_conv2d);
721   }
722 
VerifyBiasAddTensorsNear(int depth,int image_width,int image_height,int image_batch_count,int filter_size,int filter_count,const BiasAddGraphRunner & run_default,const BiasAddGraphRunner & run_fused)723   void VerifyBiasAddTensorsNear(int depth, int image_width, int image_height,
724                                 int image_batch_count, int filter_size,
725                                 int filter_count,
726                                 const BiasAddGraphRunner& run_default,
727                                 const BiasAddGraphRunner& run_fused) {
728     DataType dtype = DataTypeToEnum<T>::v();
729 
730     Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
731     image.flat<T>() = image.flat<T>().setRandom();
732 
733     // Add some negative values to filter to properly test Relu.
734     Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
735     filter.flat<T>() = filter.flat<T>().setRandom();
736     filter.flat<T>() -= filter.flat<T>().constant(static_cast<T>(0.5f));
737 
738     const int bias_size = filter_count;
739     Tensor bias(dtype, {bias_size});
740     bias.flat<T>() = bias.flat<T>().setRandom();
741     bias.flat<T>() += bias.flat<T>().constant(static_cast<T>(0.5f));
742 
743     Tensor conv_2d;
744     Tensor fused_conv_2d;
745 
746     run_default(image, filter, bias, &conv_2d);
747     run_fused(image, filter, bias, &fused_conv_2d);
748 
749     ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
750     ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
751 
752     // NOTE(intel-tf): When filter_size is equal to the input image size,
753     // conv2d essentially is element-wise multiplication followed by
754     // a full sum reduction, which causes larger numerical error
755     // than usual cases.
756     if (image_width == filter_size && image_height == filter_size) {
757       test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
758     } else {
759       test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
760     }
761   }
762 
VerifyFusedBatchNormTensorsNear(int depth,int image_width,int image_height,int image_batch_count,int filter_size,int filter_count,const BatchNormGraphRunner & run_default,const BatchNormGraphRunner & run_fused)763   void VerifyFusedBatchNormTensorsNear(int depth, int image_width,
764                                        int image_height, int image_batch_count,
765                                        int filter_size, int filter_count,
766                                        const BatchNormGraphRunner& run_default,
767                                        const BatchNormGraphRunner& run_fused) {
768     DataType dtype = DataTypeToEnum<T>::v();
769 
770     Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
771     image.flat<T>() = image.flat<T>().setRandom();
772 
773     // Add some negative values to filter to properly test Relu.
774     Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
775     filter.flat<T>() = filter.flat<T>().setRandom();
776     filter.flat<T>() -= filter.flat<T>().constant(static_cast<T>(0.5f));
777 
778     const int scale_size = filter_count;
779 
780     Tensor scale(dtype, {scale_size});
781     scale.flat<T>() = scale.flat<T>().setRandom();
782 
783     Tensor offset(dtype, {scale_size});
784     offset.flat<T>() = offset.flat<T>().setRandom();
785 
786     Tensor mean(dtype, {scale_size});
787     mean.flat<T>() = mean.flat<T>().setRandom();
788 
789     Tensor variance(dtype, {scale_size});
790     variance.flat<T>() = variance.flat<T>().setRandom();
791     variance.flat<T>() += variance.flat<T>().constant(static_cast<T>(0.5f));
792 
793     Tensor conv_2d;
794     Tensor fused_conv_2d;
795 
796     run_default(image, filter, scale, offset, mean, variance, &conv_2d);
797     run_fused(image, filter, scale, offset, mean, variance, &fused_conv_2d);
798 
799     ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
800     ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
801 
802     // NOTE(intel-tf): When filter_size is equal to the input image size,
803     // conv2d essentially is element-wise multiplication followed by
804     // a full sum reduction, which causes larger numerical error
805     // than usual cases.
806     if (image_width == filter_size && image_height == filter_size) {
807       test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
808     } else {
809       test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
810     }
811   }
812 
813   // Verifies that computing Conv2D+BiasAdd in a graph is identical to
814   // FusedConv2D.
VerifyConv2DWithBias(int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)815   void VerifyConv2DWithBias(int filter_size, int filter_count,
816                             const std::vector<int>& explicit_paddings = {},
817                             int depth = kDepth, int image_width = kImageWidth,
818                             int image_height = kImageHeight,
819                             int image_batch_count = kImageBatchCount) {
820     std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
821     const BiasAddGraphRunner run_default =
822         [this, &explicit_paddings, padding](
823             const Tensor& input_data, const Tensor& filter_data,
__anona74cd8200202( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 824             const Tensor& bias_data, Tensor* out) {
825           RunConv2DWithBias(input_data, filter_data, bias_data, padding,
826                             explicit_paddings, out);
827         };
828 
829     const BiasAddGraphRunner run_fused =
830         [this, explicit_paddings, padding](
831             const Tensor& input_data, const Tensor& filter_data,
__anona74cd8200302( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 832             const Tensor& bias_data, Tensor* out) {
833           RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"},
834                            padding, explicit_paddings, out);
835         };
836 
837     VerifyBiasAddTensorsNear(depth, image_width, image_height,
838                              image_batch_count, filter_size, filter_count,
839                              run_default, run_fused);
840   }
841 
842   // Verifies that computing Conv2D+BiasAdd+{Activation} in a graph is identical
843   // to FusedConv2D.
VerifyConv2DWithBiasAndActivation(const string & activation,int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)844   void VerifyConv2DWithBiasAndActivation(
845       const string& activation, int filter_size, int filter_count,
846       const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
847       int image_width = kImageWidth, int image_height = kImageHeight,
848       int image_batch_count = kImageBatchCount) {
849     std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
850     const BiasAddGraphRunner run_default =
851         [this, &activation, &explicit_paddings, &padding](
852             const Tensor& input_data, const Tensor& filter_data,
__anona74cd8200402( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 853             const Tensor& bias_data, Tensor* out) {
854           RunConv2DWithBiasAndActivation(
855               input_data, filter_data, bias_data, activation, padding,
856               explicit_paddings, out,
857               /*allow_gpu_device=*/activation == "Relu");
858         };
859 
860     const BiasAddGraphRunner run_fused = [this, &activation, &explicit_paddings,
861                                           padding](const Tensor& input_data,
862                                                    const Tensor& filter_data,
863                                                    const Tensor& bias_data,
__anona74cd8200502(const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 864                                                    Tensor* out) {
865       RunFusedConv2DOp(input_data, filter_data, {bias_data},
866                        {"BiasAdd", activation}, padding, explicit_paddings, out,
867                        /*allow_gpu_device=*/activation == "Relu");
868     };
869 
870     VerifyBiasAddTensorsNear(depth, image_width, image_height,
871                              image_batch_count, filter_size, filter_count,
872                              run_default, run_fused);
873   }
874 
875   // Verifies that computing Conv2D+FusedBatchNorm in a graph is identical to
876   // FusedConv2D.
VerifyConv2DWithBatchNorm(int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)877   void VerifyConv2DWithBatchNorm(int filter_size, int filter_count,
878                                  const std::vector<int>& explicit_paddings = {},
879                                  int depth = kDepth,
880                                  int image_width = kImageWidth,
881                                  int image_height = kImageHeight,
882                                  int image_batch_count = kImageBatchCount) {
883     std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
884     const BatchNormGraphRunner run_default =
885         [this, explicit_paddings, padding](
886             const Tensor& input_data, const Tensor& filter_data,
887             const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200602( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 888             const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
889           RunConv2DWithBatchNorm(input_data, filter_data, scale_data,
890                                  offset_data, mean_data, variance_data, padding,
891                                  explicit_paddings, out);
892         };
893 
894     const BatchNormGraphRunner run_fused =
895         [this, explicit_paddings, padding](
896             const Tensor& input_data, const Tensor& filter_data,
897             const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200702( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 898             const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
899           RunFusedConv2DOp(input_data, filter_data,
900                            {scale_data, offset_data, mean_data, variance_data},
901                            {"FusedBatchNorm"}, padding, explicit_paddings, out);
902         };
903 
904     VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
905                                     image_batch_count, filter_size,
906                                     filter_count, run_default, run_fused);
907   }
908 
909   // Verifies that computing Conv2D+FusedBatchNorm+{Activation} in a graph is
910   // identical to FusedConv2D.
VerifyConv2DWithBatchNormAndActivation(const string & activation,int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)911   void VerifyConv2DWithBatchNormAndActivation(
912       const string& activation, int filter_size, int filter_count,
913       const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
914       int image_width = kImageWidth, int image_height = kImageHeight,
915       int image_batch_count = kImageBatchCount) {
916     std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
917     const BatchNormGraphRunner run_default =
918         [this, &activation, explicit_paddings, padding](
919             const Tensor& input_data, const Tensor& filter_data,
920             const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200802( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 921             const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
922           RunConv2DWithBatchNormAndActivation(
923               input_data, filter_data, scale_data, offset_data, mean_data,
924               variance_data, activation, padding, explicit_paddings, out);
925         };
926 
927     const BatchNormGraphRunner run_fused =
928         [this, &activation, explicit_paddings, padding](
929             const Tensor& input_data, const Tensor& filter_data,
930             const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200902( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 931             const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
932           RunFusedConv2DOp(input_data, filter_data,
933                            {scale_data, offset_data, mean_data, variance_data},
934                            {"FusedBatchNorm", activation}, padding,
935                            explicit_paddings, out);
936         };
937 
938     VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
939                                     image_batch_count, filter_size,
940                                     filter_count, run_default, run_fused);
941   }
942 };
943 
944 // Conv2D with BatchNorm can be tested only with `T=float`, because default
945 // `FusedBatchNorm` kernel supports only floats for scale, mean and variance.
946 
947 template <typename T>
948 class FusedConv2DWithBiasOpTest : public FusedConv2DOpTest<T> {};
949 template <typename T>
950 class FusedConv2DWithBatchNormOpTest : public FusedConv2DOpTest<T> {};
951 
952 TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest);
953 TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest);
954 
955 // ROCm does not yet support the _FusedConv2D op,
956 // Therefore disable tests that check _FusedConv2D, when building with ROCm
957 
958 #ifndef TENSORFLOW_USE_ROCM
959 // -------------------------------------------------------------------------- //
960 // Conv2D + BiasAdd + {Activation}                                            //
961 // -------------------------------------------------------------------------- //
962 
TYPED_TEST_P(FusedConv2DWithBiasOpTest,OneByOneConvolution)963 TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolution) {
964   const int filter_size = 1;
965   const int filter_count = 12;
966   this->VerifyConv2DWithBias(filter_size, filter_count);
967 }
968 
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ImageSizeConvolution)969 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ImageSizeConvolution) {
970   const int filter_size = TestFixture::kImageWidth;
971   const int filter_count = 12;
972   this->VerifyConv2DWithBias(filter_size, filter_count);
973 }
974 
TYPED_TEST_P(FusedConv2DWithBiasOpTest,SpatialConvolution)975 TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolution) {
976   const int filter_size = 3;
977   const int filter_count = 12;
978   this->VerifyConv2DWithBias(filter_size, filter_count);
979 }
980 
981 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ExplicitPaddingConvolution)982 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ExplicitPaddingConvolution) {
983   const int filter_size = 3;
984   const int filter_count = 12;
985   this->VerifyConv2DWithBias(filter_size, filter_count,
986                              /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
987 }
988 #endif
989 
TYPED_TEST_P(FusedConv2DWithBiasOpTest,OneByOneConvolutionAndActivation)990 TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolutionAndActivation) {
991   // Requires full precision Conv2D op
992   tensorflow::enable_tensor_float_32_execution(false);
993   const int filter_size = 1;
994   const int filter_count = 12;
995   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
996     this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
997                                             filter_count);
998   }
999 }
1000 
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ImageSizeConvolutionAndActivation)1001 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ImageSizeConvolutionAndActivation) {
1002   const int filter_size = TestFixture::kImageWidth;
1003   const int filter_count = 12;
1004   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1005     this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1006                                             filter_count);
1007   }
1008 }
1009 
TYPED_TEST_P(FusedConv2DWithBiasOpTest,SpatialConvolutionAndActivation)1010 TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolutionAndActivation) {
1011   const int filter_size = 3;
1012   const int filter_count = 12;
1013   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1014     this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1015                                             filter_count);
1016   }
1017 }
1018 
1019 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ExplicitPaddingConvolutionAndActivation)1020 TYPED_TEST_P(FusedConv2DWithBiasOpTest,
1021              ExplicitPaddingConvolutionAndActivation) {
1022   const int filter_size = 3;
1023   const int filter_count = 12;
1024   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1025     this->VerifyConv2DWithBiasAndActivation(
1026         activation, filter_size, filter_count,
1027         /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1028   }
1029 }
1030 #endif
1031 
1032 // -------------------------------------------------------------------------- //
1033 // Conv2D + FusedBatchNorm + {Activation}                                     //
1034 // -------------------------------------------------------------------------- //
1035 
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,OneByOneConvolution)1036 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolution) {
1037   const int filter_size = 1;
1038   const int filter_count = 12;
1039   this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1040 }
1041 
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ImageSizeConvolution)1042 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ImageSizeConvolution) {
1043   const int filter_size = TestFixture::kImageWidth;
1044   const int filter_count = 12;
1045   this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1046 }
1047 
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,SpatialConvolution)1048 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolution) {
1049   const int filter_size = 3;
1050   const int filter_count = 12;
1051   this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1052 }
1053 
1054 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ExplicitPaddingConvolution)1055 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ExplicitPaddingConvolution) {
1056   const int filter_size = 3;
1057   const int filter_count = 12;
1058   this->VerifyConv2DWithBatchNorm(
1059       filter_size, filter_count,
1060       /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1061 }
1062 #endif
1063 
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,OneByOneConvolutionAndActivation)1064 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolutionAndActivation) {
1065   const int filter_size = 1;
1066   const int filter_count = 12;
1067   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1068     this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1069                                                  filter_count);
1070   }
1071 }
1072 
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ImageSizeConvolutionAndActivation)1073 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
1074              ImageSizeConvolutionAndActivation) {
1075   const int filter_size = TestFixture::kImageWidth;
1076   const int filter_count = 12;
1077   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1078     this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1079                                                  filter_count);
1080   }
1081 }
1082 
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,SpatialConvolutionAndActivation)1083 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolutionAndActivation) {
1084   const int filter_size = 3;
1085   const int filter_count = 12;
1086   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1087     this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1088                                                  filter_count);
1089   }
1090 }
1091 
1092 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ExplicitPaddingConvolutionAndActivation)1093 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
1094              ExplicitPaddingConvolutionAndActivation) {
1095   const int filter_size = 3;
1096   const int filter_count = 12;
1097   for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1098     this->VerifyConv2DWithBatchNormAndActivation(
1099         activation, filter_size, filter_count,
1100         /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1101   }
1102 }
1103 #endif
1104 
1105 #ifndef INTEL_MKL
1106 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest,          //
1107                             OneByOneConvolution,                //
1108                             ImageSizeConvolution,               //
1109                             SpatialConvolution,                 //
1110                             ExplicitPaddingConvolution,         //
1111                             OneByOneConvolutionAndActivation,   //
1112                             ImageSizeConvolutionAndActivation,  //
1113                             SpatialConvolutionAndActivation,    //
1114                             ExplicitPaddingConvolutionAndActivation);
1115 
1116 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest,     //
1117                             OneByOneConvolution,                //
1118                             ImageSizeConvolution,               //
1119                             SpatialConvolution,                 //
1120                             ExplicitPaddingConvolution,         //
1121                             OneByOneConvolutionAndActivation,   //
1122                             ImageSizeConvolutionAndActivation,  //
1123                             SpatialConvolutionAndActivation,    //
1124                             ExplicitPaddingConvolutionAndActivation);
1125 #else
1126 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest,          //
1127                             OneByOneConvolution,                //
1128                             ImageSizeConvolution,               //
1129                             SpatialConvolution,                 //
1130                             OneByOneConvolutionAndActivation,   //
1131                             ImageSizeConvolutionAndActivation,  //
1132                             SpatialConvolutionAndActivation);
1133 
1134 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest,     //
1135                             OneByOneConvolution,                //
1136                             ImageSizeConvolution,               //
1137                             SpatialConvolution,                 //
1138                             OneByOneConvolutionAndActivation,   //
1139                             ImageSizeConvolutionAndActivation,  //
1140                             SpatialConvolutionAndActivation);
1141 #endif
1142 
1143 using FusedBiasAddDataTypes = ::testing::Types<float, double>;
1144 INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest,
1145                                FusedBiasAddDataTypes);
1146 
1147 using FusedBatchNormDataTypes = ::testing::Types<float>;
1148 INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBatchNormOpTest,
1149                                FusedBatchNormDataTypes);
1150 
1151 #endif  // TENSORFLOW_USE_ROCM
1152 }  // namespace tensorflow
1153