1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <string>
17 #include <vector>
18
19 #include "absl/algorithm/container.h"
20 #include "tensorflow/cc/ops/const_op.h"
21 #include "tensorflow/cc/ops/image_ops.h"
22 #include "tensorflow/cc/ops/nn_ops.h"
23 #include "tensorflow/cc/ops/nn_ops_internal.h"
24 #include "tensorflow/cc/ops/standard_ops.h"
25 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
26 #include "tensorflow/core/framework/fake_input.h"
27 #include "tensorflow/core/framework/node_def_builder.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/types.pb.h"
30 #include "tensorflow/core/kernels/conv_ops_gpu.h"
31 #include "tensorflow/core/kernels/ops_testutil.h"
32 #include "tensorflow/core/kernels/ops_util.h"
33 #include "tensorflow/core/lib/core/status_test_util.h"
34 #include "tensorflow/core/platform/tensor_float_32_utils.h"
35 #include "tensorflow/core/platform/test.h"
36 #include "tensorflow/core/platform/test_benchmark.h"
37 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
38 #include "tensorflow/core/public/session.h"
39
40 namespace tensorflow {
41
42 class FusedResizePadConvOpTest : public OpsTestBase {
43 protected:
44 template <typename T>
HandwrittenConv(DataType dtype)45 void HandwrittenConv(DataType dtype) {
46 const int stride = 1;
47 TF_EXPECT_OK(NodeDefBuilder("fused_resize_op", "FusedResizeAndPadConv2D")
48 .Input(FakeInput(dtype))
49 .Input(FakeInput(DT_INT32))
50 .Input(FakeInput(DT_INT32))
51 .Input(FakeInput(dtype))
52 .Attr("T", dtype)
53 .Attr("resize_align_corners", false)
54 .Attr("mode", "REFLECT")
55 .Attr("strides", {1, stride, stride, 1})
56 .Attr("padding", "SAME")
57 .Finalize(node_def()));
58 TF_EXPECT_OK(InitOp());
59 const int depth = 1;
60 const int image_width = 4;
61 const int image_height = 3;
62 const int image_batch_count = 1;
63 // The image matrix is:
64 // | 1 | 2 | 3 | 4 |
65 // | 5 | 6 | 7 | 8 |
66 // | 9 | 10 | 11 | 12 |
67 Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
68 test::FillValues<T>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
69
70 // The filter matrix is:
71 // | 1 | 4 | 7 |
72 // | 2 | 5 | 8 |
73 // | 3 | 6 | 9 |
74 const int filter_size = 3;
75 const int filter_count = 1;
76 Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
77 test::FillValues<T>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
78
79 const int resized_width = image_width;
80 const int resized_height = image_height;
81
82 const int top_padding = 0;
83 const int bottom_padding = 0;
84 const int left_padding = 0;
85 const int right_padding = 0;
86
87 AddInputFromArray<T>(image.shape(), image.flat<T>());
88 AddInputFromArray<int32>(TensorShape({2}), {resized_height, resized_width});
89 AddInputFromArray<int32>(
90 TensorShape({4, 2}),
91 {0, 0, top_padding, bottom_padding, left_padding, right_padding, 0, 0});
92 AddInputFromArray<T>(filter.shape(), filter.flat<T>());
93 TF_ASSERT_OK(RunOpKernel());
94
95 // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
96 // the input set to zero because we're using the 'SAME' padding mode.
97 // The calculations behind the expected output are:
98 // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
99 // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
100 // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
101 // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
102 // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
103 // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
104 // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
105 // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
106 // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
107 // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
108 // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
109 // (1*7)+(4*11)+(7*0)+(2*8)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
110 // This means we should end up with this matrix:
111 // | 105 | 150 | 183 | 95 |
112 // | 235 | 312 | 357 | 178 |
113 // | 187 | 234 | 261 | 121 |
114 const int expected_width = image_width;
115 const int expected_height = image_height * filter_count;
116 Tensor expected(dtype, TensorShape({image_batch_count, expected_height,
117 expected_width, filter_count}));
118 test::FillValues<T>(
119 &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
120 const Tensor& output = *GetOutput(0);
121 test::ExpectTensorNear<T>(expected, output, 1e-5);
122 }
123
124 template <typename T>
CompareFusedAndSeparate(int input_width,int input_height,int input_depth,int resize_width,int resize_height,int y_padding,int x_padding,int filter_size,int filter_count,bool resize_align_corners,const string & pad_mode,int stride,const string & padding,DataType dtype)125 void CompareFusedAndSeparate(int input_width, int input_height,
126 int input_depth, int resize_width,
127 int resize_height, int y_padding, int x_padding,
128 int filter_size, int filter_count,
129 bool resize_align_corners,
130 const string& pad_mode, int stride,
131 const string& padding, DataType dtype) {
132 Scope root = tensorflow::Scope::NewRootScope();
133 using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
134
135 Tensor input_data(DT_FLOAT,
136 TensorShape({1, input_height, input_width, input_depth}));
137 test::FillIota<float>(&input_data, 1.0f);
138 Output input =
139 Const(root.WithOpName("input"), Input::Initializer(input_data));
140 Output casted_input = Cast(root.WithOpName("casted_input"), input, dtype);
141
142 Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
143 input_depth, filter_count}));
144 test::FillIota<float>(&filter_data, 1.0f);
145 Output filter =
146 Const(root.WithOpName("filter"), Input::Initializer(filter_data));
147 Output casted_filter =
148 Cast(root.WithOpName("casted_filter"), filter, dtype);
149
150 Output resize_size =
151 Const(root.WithOpName("resize_size"), {resize_height, resize_width});
152 Output resize =
153 ResizeBilinear(root.WithOpName("resize"), input, resize_size,
154 ResizeBilinear::AlignCorners(resize_align_corners));
155 // Bilinear resize only output float, cast it to dtype to match the input.
156 Output casted_resize = Cast(root.WithOpName("cast"), resize, dtype);
157 Output paddings =
158 Const(root.WithOpName("paddings"),
159 {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
160 Output mirror_pad = MirrorPad(root.WithOpName("mirror_pad"), casted_resize,
161 paddings, pad_mode);
162 Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, casted_filter,
163 {1, stride, stride, 1}, padding);
164
165 Output fused_conv = FusedResizeAndPadConv2D(
166 root.WithOpName("fused_conv"), casted_input, resize_size, paddings,
167 casted_filter, pad_mode, {1, stride, stride, 1}, padding,
168 FusedResizeAndPadConv2D::ResizeAlignCorners(resize_align_corners));
169
170 tensorflow::GraphDef graph;
171 TF_ASSERT_OK(root.ToGraphDef(&graph));
172
173 std::unique_ptr<tensorflow::Session> session(
174 tensorflow::NewSession(tensorflow::SessionOptions()));
175 TF_ASSERT_OK(session->Create(graph));
176
177 std::vector<Tensor> unfused_tensors;
178 TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
179
180 std::vector<Tensor> fused_tensors;
181 TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
182
183 test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
184 }
185
186 template <typename T>
CompareFusedPadOnlyAndSeparate(int input_width,int input_height,int input_depth,int y_padding,int x_padding,int filter_size,int filter_count,const string & pad_mode,int stride,const string & padding,DataType dtype)187 void CompareFusedPadOnlyAndSeparate(int input_width, int input_height,
188 int input_depth, int y_padding,
189 int x_padding, int filter_size,
190 int filter_count, const string& pad_mode,
191 int stride, const string& padding,
192 DataType dtype) {
193 Scope root = tensorflow::Scope::NewRootScope();
194 using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
195
196 Tensor input_data(DT_FLOAT,
197 TensorShape({1, input_height, input_width, input_depth}));
198 test::FillIota<float>(&input_data, 1.0f);
199 Output input =
200 Const(root.WithOpName("input"), Input::Initializer(input_data));
201 Output casted_input = Cast(root.WithOpName("casted_input"), input, dtype);
202
203 Tensor filter_data(DT_FLOAT, TensorShape({filter_size, filter_size,
204 input_depth, filter_count}));
205 test::FillIota<float>(&filter_data, 1.0f);
206 Output filter =
207 Const(root.WithOpName("filter"), Input::Initializer(filter_data));
208 Output casted_filter =
209 Cast(root.WithOpName("casted_filter"), filter, dtype);
210
211 Output paddings =
212 Const(root.WithOpName("paddings"),
213 {{0, 0}, {y_padding, y_padding}, {x_padding, x_padding}, {0, 0}});
214 Output mirror_pad = MirrorPad(root.WithOpName("mirror_pad"), casted_input,
215 paddings, pad_mode);
216 Output conv = Conv2D(root.WithOpName("conv"), mirror_pad, casted_filter,
217 {1, stride, stride, 1}, padding);
218
219 Output fused_conv = FusedPadConv2D(
220 root.WithOpName("fused_conv"), casted_input, paddings, casted_filter,
221 pad_mode, {1, stride, stride, 1}, padding);
222
223 tensorflow::GraphDef graph;
224 TF_ASSERT_OK(root.ToGraphDef(&graph));
225
226 std::unique_ptr<tensorflow::Session> session(
227 tensorflow::NewSession(tensorflow::SessionOptions()));
228 TF_ASSERT_OK(session->Create(graph));
229
230 std::vector<Tensor> unfused_tensors;
231 TF_ASSERT_OK(session->Run({}, {"conv"}, {}, &unfused_tensors));
232
233 std::vector<Tensor> fused_tensors;
234 TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors));
235
236 test::ExpectClose(unfused_tensors[0], fused_tensors[0]);
237 }
238 };
239
TEST_F(FusedResizePadConvOpTest,HandwrittenConvHalf)240 TEST_F(FusedResizePadConvOpTest, HandwrittenConvHalf) {
241 HandwrittenConv<Eigen::half>(DT_HALF);
242 }
243
TEST_F(FusedResizePadConvOpTest,HandwrittenConvFloat)244 TEST_F(FusedResizePadConvOpTest, HandwrittenConvFloat) {
245 HandwrittenConv<float>(DT_FLOAT);
246 }
247
TEST_F(FusedResizePadConvOpTest,HandwrittenConvDouble)248 TEST_F(FusedResizePadConvOpTest, HandwrittenConvDouble) {
249 HandwrittenConv<double>(DT_DOUBLE);
250 }
251
TEST_F(FusedResizePadConvOpTest,IdentityComparativeHalf)252 TEST_F(FusedResizePadConvOpTest, IdentityComparativeHalf) {
253 CompareFusedAndSeparate<Eigen::half>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
254 "REFLECT", 1, "SAME", DT_HALF);
255 }
256
TEST_F(FusedResizePadConvOpTest,IdentityComparativeFloat)257 TEST_F(FusedResizePadConvOpTest, IdentityComparativeFloat) {
258 CompareFusedAndSeparate<float>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
259 "REFLECT", 1, "SAME", DT_FLOAT);
260 }
261
TEST_F(FusedResizePadConvOpTest,IdentityComparativeDouble)262 TEST_F(FusedResizePadConvOpTest, IdentityComparativeDouble) {
263 CompareFusedAndSeparate<double>(10, 10, 1, 10, 10, 0, 0, 1, 1, false,
264 "REFLECT", 1, "SAME", DT_DOUBLE);
265 }
266
TEST_F(FusedResizePadConvOpTest,ConvOnlyComparative)267 TEST_F(FusedResizePadConvOpTest, ConvOnlyComparative) {
268 CompareFusedAndSeparate<float>(10, 10, 3, 10, 10, 0, 0, 4, 4, false,
269 "REFLECT", 1, "SAME", DT_FLOAT);
270 }
271
TEST_F(FusedResizePadConvOpTest,ResizeOnlyComparative)272 TEST_F(FusedResizePadConvOpTest, ResizeOnlyComparative) {
273 CompareFusedAndSeparate<float>(10, 10, 1, 20, 20, 0, 0, 1, 1, false,
274 "REFLECT", 1, "SAME", DT_FLOAT);
275 }
276
TEST_F(FusedResizePadConvOpTest,ResizeAndConvComparative)277 TEST_F(FusedResizePadConvOpTest, ResizeAndConvComparative) {
278 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 1,
279 "SAME", DT_FLOAT);
280 }
281
TEST_F(FusedResizePadConvOpTest,ResizeAlignAndConvComparative)282 TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvComparative) {
283 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
284 "SAME", DT_FLOAT);
285 }
286
TEST_F(FusedResizePadConvOpTest,ResizeAndConvStridedComparative)287 TEST_F(FusedResizePadConvOpTest, ResizeAndConvStridedComparative) {
288 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, false, "REFLECT", 2,
289 "SAME", DT_FLOAT);
290 }
291
TEST_F(FusedResizePadConvOpTest,ResizeAlignAndConvValidComparative)292 TEST_F(FusedResizePadConvOpTest, ResizeAlignAndConvValidComparative) {
293 CompareFusedAndSeparate<float>(2, 2, 4, 4, 2, 0, 0, 2, 2, true, "REFLECT", 1,
294 "VALID", DT_FLOAT);
295 }
296
TEST_F(FusedResizePadConvOpTest,PadOnlyComparative)297 TEST_F(FusedResizePadConvOpTest, PadOnlyComparative) {
298 CompareFusedAndSeparate<float>(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
299 "SAME", DT_FLOAT);
300 }
301
TEST_F(FusedResizePadConvOpTest,PadOnlyWithChannelsComparative)302 TEST_F(FusedResizePadConvOpTest, PadOnlyWithChannelsComparative) {
303 CompareFusedAndSeparate<float>(4, 4, 3, 4, 4, 2, 2, 1, 1, false, "REFLECT", 1,
304 "SAME", DT_FLOAT);
305 }
306
TEST_F(FusedResizePadConvOpTest,ResizeAndPadComparative)307 TEST_F(FusedResizePadConvOpTest, ResizeAndPadComparative) {
308 CompareFusedAndSeparate<float>(4, 4, 1, 6, 6, 2, 2, 1, 1, false, "REFLECT", 1,
309 "SAME", DT_FLOAT);
310 }
311
TEST_F(FusedResizePadConvOpTest,PadOnlySymmetricComparative)312 TEST_F(FusedResizePadConvOpTest, PadOnlySymmetricComparative) {
313 CompareFusedAndSeparate<float>(4, 4, 1, 4, 4, 2, 2, 1, 1, false, "SYMMETRIC",
314 1, "SAME", DT_FLOAT);
315 }
316
TEST_F(FusedResizePadConvOpTest,ResizeAndPadSymmetricComparative)317 TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparative) {
318 CompareFusedAndSeparate<float>(4, 4, 3, 6, 6, 2, 2, 1, 1, false, "SYMMETRIC",
319 1, "SAME", DT_FLOAT);
320 }
321
TEST_F(FusedResizePadConvOpTest,ResizeAndPadSymmetricComparativeLarge)322 TEST_F(FusedResizePadConvOpTest, ResizeAndPadSymmetricComparativeLarge) {
323 CompareFusedAndSeparate<float>(1000, 1000, 3, 1006, 1006, 2, 2, 1, 1, false,
324 "SYMMETRIC", 1, "SAME", DT_FLOAT);
325 }
326
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeHalf)327 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeHalf) {
328 CompareFusedPadOnlyAndSeparate<Eigen::half>(10, 10, 1, 0, 0, 1, 1, "REFLECT",
329 1, "SAME", DT_HALF);
330 }
331
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeFloat)332 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeFloat) {
333 CompareFusedPadOnlyAndSeparate<float>(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1,
334 "SAME", DT_FLOAT);
335 }
336
TEST_F(FusedResizePadConvOpTest,NoResizeIdentityComparativeDouble)337 TEST_F(FusedResizePadConvOpTest, NoResizeIdentityComparativeDouble) {
338 CompareFusedPadOnlyAndSeparate<double>(10, 10, 1, 0, 0, 1, 1, "REFLECT", 1,
339 "SAME", DT_DOUBLE);
340 }
341
TEST_F(FusedResizePadConvOpTest,NoResizeConvOnlyComparative)342 TEST_F(FusedResizePadConvOpTest, NoResizeConvOnlyComparative) {
343 CompareFusedPadOnlyAndSeparate<float>(10, 10, 3, 0, 0, 4, 4, "REFLECT", 1,
344 "SAME", DT_FLOAT);
345 }
346
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlyComparative)347 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyComparative) {
348 CompareFusedPadOnlyAndSeparate<float>(4, 4, 1, 2, 2, 1, 1, "REFLECT", 1,
349 "SAME", DT_FLOAT);
350 }
351
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlyWithChannelsComparative)352 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlyWithChannelsComparative) {
353 CompareFusedPadOnlyAndSeparate<float>(4, 4, 3, 2, 2, 1, 1, "REFLECT", 1,
354 "SAME", DT_FLOAT);
355 }
356
TEST_F(FusedResizePadConvOpTest,NoResizePadOnlySymmetricComparative)357 TEST_F(FusedResizePadConvOpTest, NoResizePadOnlySymmetricComparative) {
358 CompareFusedPadOnlyAndSeparate<float>(4, 4, 1, 2, 2, 1, 1, "SYMMETRIC", 1,
359 "SAME", DT_FLOAT);
360 }
361
362 class ConvOpTest : public OpsTestBase {
363 protected:
HandwrittenConv()364 void HandwrittenConv() {
365 const int stride = 1;
366 TF_EXPECT_OK(NodeDefBuilder("conv_op", "Conv2D")
367 .Input(FakeInput(DT_FLOAT))
368 .Input(FakeInput(DT_FLOAT))
369 .Attr("T", DT_FLOAT)
370 .Attr("strides", {1, stride, stride, 1})
371 .Attr("padding", "SAME")
372 .Finalize(node_def()));
373 TF_EXPECT_OK(InitOp());
374 const int depth = 1;
375 const int image_width = 4;
376 const int image_height = 3;
377 const int image_batch_count = 1;
378 // The image matrix is:
379 // | 1 | 2 | 3 | 4 |
380 // | 5 | 6 | 7 | 8 |
381 // | 9 | 10 | 11 | 12 |
382 Tensor image(DT_FLOAT,
383 {image_batch_count, image_height, image_width, depth});
384 test::FillValues<float>(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12});
385
386 // The filter matrix is:
387 // | 1 | 4 | 7 |
388 // | 2 | 5 | 8 |
389 // | 3 | 6 | 9 |
390 const int filter_size = 3;
391 const int filter_count = 1;
392 Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
393 test::FillValues<float>(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9});
394
395 AddInputFromArray<float>(image.shape(), image.flat<float>());
396 AddInputFromArray<float>(filter.shape(), filter.flat<float>());
397 TF_ASSERT_OK(RunOpKernel());
398
399 // We're sliding the 3x3 filter across the 3x4 image, with accesses outside
400 // the input set to zero because we're using the 'SAME' padding mode.
401 // The calculations behind the expected output are:
402 // (1*0)+(4*0)+(7*0)+(2*0)+(5*1)+(8*2)+(3*0)+(6*5)+(9*6)=105
403 // (1*0)+(4*0)+(7*0)+(2*1)+(5*2)+(8*3)+(3*5)+(6*6)+(9*7)=150
404 // (1*0)+(4*0)+(7*0)+(2*2)+(5*3)+(8*4)+(3*6)+(6*7)+(9*8)=183
405 // (1*0)+(4*0)+(7*0)+(2*3)+(5*4)+(8*0)+(3*7)+(6*8)+(9*0)=95
406 // (1*0)+(4*1)+(7*2)+(2*0)+(5*5)+(8*6)+(3*0)+(6*9)+(9*10)=235
407 // (1*1)+(4*2)+(7*3)+(2*5)+(5*6)+(8*7)+(3*9)+(6*10)+(9*11)=312
408 // (1*2)+(4*3)+(7*4)+(2*6)+(5*7)+(8*8)+(3*10)+(6*11)+(9*12)=357
409 // (1*3)+(4*4)+(7*0)+(2*7)+(5*8)+(8*0)+(3*11)+(6*12)+(9*0)=178
410 // (1*0)+(4*5)+(7*6)+(2*0)+(5*9)+(8*10)+(3*0)+(6*0)+(9*0)=187
411 // (1*5)+(4*6)+(7*7)+(2*9)+(5*10)+(8*11)+(3*0)+(6*0)+(9*0)=234
412 // (1*6)+(4*7)+(7*8)+(2*10)+(5*11)+(8*12)+(3*0)+(6*0)+(9*0)=261
413 // (1*7)+(4*8)+(7*0)+(2*11)+(5*12)+(8*0)+(3*0)+(6*0)+(9*0)=121
414 // This means we should end up with this matrix:
415 // | 105 | 150 | 183 | 95 |
416 // | 235 | 312 | 357 | 178 |
417 // | 187 | 234 | 261 | 121 |
418 const int expected_width = image_width;
419 const int expected_height = image_height * filter_count;
420 Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
421 expected_width, filter_count}));
422 test::FillValues<float>(
423 &expected, {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121});
424 const Tensor& output = *GetOutput(0);
425 test::ExpectTensorNear<float>(expected, output, 1e-5);
426 }
427
AnisotropicStrides()428 void AnisotropicStrides() {
429 const int stride_width = 3;
430 const int stride_height = 1;
431 TF_EXPECT_OK(NodeDefBuilder("conv_op", "Conv2D")
432 .Input(FakeInput(DT_FLOAT))
433 .Input(FakeInput(DT_FLOAT))
434 .Attr("T", DT_FLOAT)
435 .Attr("strides", {1, stride_height, stride_width, 1})
436 .Attr("padding", "VALID")
437 .Finalize(node_def()));
438 TF_EXPECT_OK(InitOp());
439 const int depth = 1;
440 const int image_width = 6;
441 const int image_height = 3;
442 const int image_batch_count = 1;
443 Tensor image(DT_FLOAT,
444 {image_batch_count, image_height, image_width, depth});
445 test::FillValues<float>(&image, {
446 3, 2, 1, -1, -2, -3, //
447 4, 3, 2, -2, -3, -4, //
448 5, 4, 3, -3, -4, -5, //
449 });
450 const int filter_size = 2;
451 const int filter_count = 1;
452 Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count});
453 test::FillValues<float>(&filter, {
454 1, 2, //
455 3, 4, //
456 });
457
458 AddInputFromArray<float>(image.shape(), image.flat<float>());
459 AddInputFromArray<float>(filter.shape(), filter.flat<float>());
460 TF_ASSERT_OK(RunOpKernel());
461
462 const int expected_width = 2;
463 const int expected_height = 2;
464 Tensor expected(DT_FLOAT, TensorShape({image_batch_count, expected_height,
465 expected_width, filter_count}));
466 test::FillValues<float>(&expected, {31, -23, 41, -33});
467 const Tensor& output = *GetOutput(0);
468 test::ExpectTensorNear<float>(expected, output, 1e-5);
469 }
470 };
471
TEST_F(ConvOpTest,HandwrittenConv)472 TEST_F(ConvOpTest, HandwrittenConv) { HandwrittenConv(); }
473
TEST_F(ConvOpTest,AnisotropicStride)474 TEST_F(ConvOpTest, AnisotropicStride) { AnisotropicStrides(); }
475
476 template <typename T>
477 class FusedConv2DOpTest : public OpsTestBase {
478 protected:
479 static constexpr int kDepth = 3;
480 static constexpr int kImageWidth = 32;
481 static constexpr int kImageHeight = 32;
482 static constexpr int kImageBatchCount = 8;
483
484 using BiasAddGraphRunner =
485 std::function<void(const Tensor& input_data, const Tensor& filter_data,
486 const Tensor& bias_data, Tensor* out)>;
487
488 using BatchNormGraphRunner = std::function<void(
489 const Tensor& input_data, const Tensor& filter_data,
490 const Tensor& scale_data, const Tensor& offset_data,
491 const Tensor& mean_data, const Tensor& variance_data, Tensor* out)>;
492
493 // Runs a Tensorflow graph defined by the root scope, and fetches the result
494 // of 'fetch' node into the output Tensor. Optional `fetch_node` parameter
495 // allows to define a fetch node directly using a NodeDef for the ops that are
496 // not supported by the C++ Api.
RunAndFetch(const tensorflow::Scope & root,const string & fetch,Tensor * output,bool allow_gpu_device,const NodeDef * fetch_node=nullptr)497 void RunAndFetch(const tensorflow::Scope& root, const string& fetch,
498 Tensor* output, bool allow_gpu_device,
499 const NodeDef* fetch_node = nullptr) {
500 tensorflow::GraphDef graph;
501 TF_ASSERT_OK(root.ToGraphDef(&graph));
502
503 if (fetch_node) {
504 *graph.add_node() = *fetch_node;
505 }
506
507 // We really want to make sure that graph executed exactly as we passed it
508 // to the session, so we disable various optimizations.
509 tensorflow::SessionOptions session_options;
510
511 // Disable common runtime constant folding.
512 session_options.config.mutable_graph_options()
513 ->mutable_optimizer_options()
514 ->set_opt_level(OptimizerOptions::L0);
515
516 // Disable Grappler optimizations for tests.
517 tensorflow::RewriterConfig* cfg =
518 session_options.config.mutable_graph_options()
519 ->mutable_rewrite_options();
520 cfg->set_constant_folding(tensorflow::RewriterConfig::OFF);
521 cfg->set_layout_optimizer(tensorflow::RewriterConfig::OFF);
522 cfg->set_remapping(tensorflow::RewriterConfig::OFF);
523
524 std::unique_ptr<tensorflow::Session> session(
525 tensorflow::NewSession(session_options));
526
527 std::vector<DeviceAttributes> available_devices;
528 TF_ASSERT_OK(session->ListDevices(&available_devices))
529 << "Failed to get available session devices";
530
531 // Check if session has an available GPU device.
532 const bool has_gpu_device =
533 absl::c_any_of(available_devices, [](const DeviceAttributes& device) {
534 return device.device_type() == DEVICE_GPU;
535 });
536
537 // Some of the `FusedConv2D` fusion types are implemented only for CPU, and
538 // in this test we don't want to compare GPU vs CPU numbers, so place all
539 // nodes on CPU in this case.
540 const bool place_all_on_gpu = allow_gpu_device && has_gpu_device;
541
542 const string device = place_all_on_gpu ? "/device:GPU:0" : "/device:CPU:0";
543 for (NodeDef& mutable_node : *graph.mutable_node()) {
544 mutable_node.set_device(device);
545 }
546
547 TF_ASSERT_OK(session->Create(graph));
548
549 std::vector<Tensor> unfused_tensors;
550 TF_ASSERT_OK(session->Run({}, {fetch}, {}, &unfused_tensors));
551
552 *output = unfused_tensors[0];
553 }
554
RunConv2DWithBias(const Tensor & input_data,const Tensor & filter_data,const Tensor & bias_data,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)555 void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data,
556 const Tensor& bias_data, const std::string& padding,
557 const std::vector<int>& explicit_paddings,
558 Tensor* output, bool allow_gpu_device = false,
559 int stride = 1) {
560 Scope root = tensorflow::Scope::NewRootScope();
561
562 ops::Conv2D conv = ops::Conv2D(
563 root.WithOpName("conv"),
564 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
565 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
566 {1, stride, stride, 1}, padding,
567 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
568
569 ops::BiasAdd with_bias = ops::BiasAdd(
570 root.WithOpName("with_bias"), conv,
571 ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
572
573 RunAndFetch(root, "with_bias", output, allow_gpu_device);
574 }
575
RunConv2DWithBiasAndActivation(const Tensor & input_data,const Tensor & filter_data,const Tensor & bias_data,const string & activation_type,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)576 void RunConv2DWithBiasAndActivation(
577 const Tensor& input_data, const Tensor& filter_data,
578 const Tensor& bias_data, const string& activation_type,
579 const std::string& padding, const std::vector<int>& explicit_paddings,
580 Tensor* output, bool allow_gpu_device = false, int stride = 1) {
581 Scope root = tensorflow::Scope::NewRootScope();
582
583 ops::Conv2D conv = ops::Conv2D(
584 root.WithOpName("conv"),
585 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
586 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
587 {1, stride, stride, 1}, padding,
588 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
589
590 ops::BiasAdd with_bias = ops::BiasAdd(
591 root.WithOpName("with_bias"), conv,
592 ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
593
594 if (activation_type == "Relu") {
595 ops::Relu(root.WithOpName("with_activation"), with_bias);
596 } else if (activation_type == "Relu6") {
597 ops::Relu6(root.WithOpName("with_activation"), with_bias);
598 } else if (activation_type == "Elu") {
599 ops::Elu(root.WithOpName("with_activation"), with_bias);
600 } else if (activation_type == "LeakyRelu") {
601 ops::internal::LeakyRelu(root.WithOpName("with_activation"), with_bias);
602 } else {
603 ops::Identity(root.WithOpName("with_activation"), with_bias);
604 }
605
606 RunAndFetch(root, "with_activation", output, allow_gpu_device);
607 }
608
RunConv2DWithBatchNorm(const Tensor & input_data,const Tensor & filter_data,const Tensor & scale_data,const Tensor & offset_data,const Tensor & mean_data,const Tensor & variance_data,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)609 void RunConv2DWithBatchNorm(
610 const Tensor& input_data, const Tensor& filter_data,
611 const Tensor& scale_data, const Tensor& offset_data,
612 const Tensor& mean_data, const Tensor& variance_data,
613 const std::string& padding, const std::vector<int>& explicit_paddings,
614 Tensor* output, bool allow_gpu_device = false, int stride = 1) {
615 Scope root = tensorflow::Scope::NewRootScope();
616
617 ops::Conv2D conv = ops::Conv2D(
618 root.WithOpName("conv"),
619 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
620 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
621 {1, stride, stride, 1}, padding,
622 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
623
624 ops::FusedBatchNorm::Attrs attr;
625 attr = attr.IsTraining(false);
626
627 ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
628 root.WithOpName("with_fused_batch_norm"), conv,
629 ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
630 ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
631 ops::Const(root.WithOpName("mean"), Input::Initializer(mean_data)),
632 ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
633 attr);
634
635 RunAndFetch(root, "with_fused_batch_norm", output, allow_gpu_device);
636 }
637
RunConv2DWithBatchNormAndActivation(const Tensor & input_data,const Tensor & filter_data,const Tensor & scale_data,const Tensor & offset_data,const Tensor & mean_data,const Tensor & variance_data,const string & activation_type,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)638 void RunConv2DWithBatchNormAndActivation(
639 const Tensor& input_data, const Tensor& filter_data,
640 const Tensor& scale_data, const Tensor& offset_data,
641 const Tensor& mean_data, const Tensor& variance_data,
642 const string& activation_type, const std::string& padding,
643 const std::vector<int>& explicit_paddings, Tensor* output,
644 bool allow_gpu_device = false, int stride = 1) {
645 Scope root = tensorflow::Scope::NewRootScope();
646
647 ops::Conv2D conv = ops::Conv2D(
648 root.WithOpName("conv"),
649 ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
650 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
651 {1, stride, stride, 1}, padding,
652 ops::Conv2D::Attrs().ExplicitPaddings(explicit_paddings));
653
654 ops::FusedBatchNorm::Attrs attr;
655 attr = attr.IsTraining(false);
656
657 ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
658 root.WithOpName("with_fused_batch_norm"), conv,
659 ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
660 ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
661 ops::Const(root.WithOpName("mean"), Input::Initializer(mean_data)),
662 ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
663 attr);
664
665 if (activation_type == "Relu") {
666 ops::Relu(root.WithOpName("with_activation"), with_fused_batch_norm.y);
667 } else if (activation_type == "Relu6") {
668 ops::Relu6(root.WithOpName("with_activation"), with_fused_batch_norm.y);
669 } else if (activation_type == "Elu") {
670 ops::Elu(root.WithOpName("with_activation"), with_fused_batch_norm.y);
671 } else if (activation_type == "LeakyRelu") {
672 ops::internal::LeakyRelu(root.WithOpName("with_activation"),
673 with_fused_batch_norm.y);
674 } else {
675 ops::Identity(root.WithOpName("with_activation"),
676 with_fused_batch_norm.y);
677 }
678
679 RunAndFetch(root, "with_activation", output, allow_gpu_device);
680 }
681
RunFusedConv2DOp(const Tensor & input_data,const Tensor & filter_data,const std::vector<Tensor> & args_data,const std::vector<string> & fused_ops,const std::string & padding,const std::vector<int> & explicit_paddings,Tensor * output,bool allow_gpu_device=false,int stride=1)682 void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data,
683 const std::vector<Tensor>& args_data,
684 const std::vector<string>& fused_ops,
685 const std::string& padding,
686 const std::vector<int>& explicit_paddings,
687 Tensor* output, bool allow_gpu_device = false,
688 int stride = 1) {
689 Scope root = tensorflow::Scope::NewRootScope();
690
691 DataType dtype = DataTypeToEnum<T>::v();
692 int num_args = static_cast<int>(args_data.size());
693
694 Output input =
695 ops::Const(root.WithOpName("input"), Input::Initializer(input_data));
696 Output filter =
697 ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data));
698
699 std::vector<NodeDefBuilder::NodeOut> args;
700 for (int i = 0; i < num_args; ++i) {
701 Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)),
702 Input::Initializer(args_data[i]));
703 args.emplace_back(arg.name(), 0, dtype);
704 }
705
706 NodeDef fused_conv2d;
707 TF_EXPECT_OK(NodeDefBuilder("fused_conv", "_FusedConv2D")
708 .Input({input.name(), 0, dtype})
709 .Input({filter.name(), 0, dtype})
710 .Input(args)
711 .Attr("num_args", num_args)
712 .Attr("T", dtype)
713 .Attr("strides", {1, stride, stride, 1})
714 .Attr("padding", padding)
715 .Attr("explicit_paddings", explicit_paddings)
716 .Attr("fused_ops", fused_ops)
717 .Finalize(&fused_conv2d));
718
719 RunAndFetch(root, fused_conv2d.name(), output, allow_gpu_device,
720 &fused_conv2d);
721 }
722
VerifyBiasAddTensorsNear(int depth,int image_width,int image_height,int image_batch_count,int filter_size,int filter_count,const BiasAddGraphRunner & run_default,const BiasAddGraphRunner & run_fused)723 void VerifyBiasAddTensorsNear(int depth, int image_width, int image_height,
724 int image_batch_count, int filter_size,
725 int filter_count,
726 const BiasAddGraphRunner& run_default,
727 const BiasAddGraphRunner& run_fused) {
728 DataType dtype = DataTypeToEnum<T>::v();
729
730 Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
731 image.flat<T>() = image.flat<T>().setRandom();
732
733 // Add some negative values to filter to properly test Relu.
734 Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
735 filter.flat<T>() = filter.flat<T>().setRandom();
736 filter.flat<T>() -= filter.flat<T>().constant(static_cast<T>(0.5f));
737
738 const int bias_size = filter_count;
739 Tensor bias(dtype, {bias_size});
740 bias.flat<T>() = bias.flat<T>().setRandom();
741 bias.flat<T>() += bias.flat<T>().constant(static_cast<T>(0.5f));
742
743 Tensor conv_2d;
744 Tensor fused_conv_2d;
745
746 run_default(image, filter, bias, &conv_2d);
747 run_fused(image, filter, bias, &fused_conv_2d);
748
749 ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
750 ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
751
752 // NOTE(intel-tf): When filter_size is equal to the input image size,
753 // conv2d essentially is element-wise multiplication followed by
754 // a full sum reduction, which causes larger numerical error
755 // than usual cases.
756 if (image_width == filter_size && image_height == filter_size) {
757 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
758 } else {
759 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
760 }
761 }
762
VerifyFusedBatchNormTensorsNear(int depth,int image_width,int image_height,int image_batch_count,int filter_size,int filter_count,const BatchNormGraphRunner & run_default,const BatchNormGraphRunner & run_fused)763 void VerifyFusedBatchNormTensorsNear(int depth, int image_width,
764 int image_height, int image_batch_count,
765 int filter_size, int filter_count,
766 const BatchNormGraphRunner& run_default,
767 const BatchNormGraphRunner& run_fused) {
768 DataType dtype = DataTypeToEnum<T>::v();
769
770 Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
771 image.flat<T>() = image.flat<T>().setRandom();
772
773 // Add some negative values to filter to properly test Relu.
774 Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
775 filter.flat<T>() = filter.flat<T>().setRandom();
776 filter.flat<T>() -= filter.flat<T>().constant(static_cast<T>(0.5f));
777
778 const int scale_size = filter_count;
779
780 Tensor scale(dtype, {scale_size});
781 scale.flat<T>() = scale.flat<T>().setRandom();
782
783 Tensor offset(dtype, {scale_size});
784 offset.flat<T>() = offset.flat<T>().setRandom();
785
786 Tensor mean(dtype, {scale_size});
787 mean.flat<T>() = mean.flat<T>().setRandom();
788
789 Tensor variance(dtype, {scale_size});
790 variance.flat<T>() = variance.flat<T>().setRandom();
791 variance.flat<T>() += variance.flat<T>().constant(static_cast<T>(0.5f));
792
793 Tensor conv_2d;
794 Tensor fused_conv_2d;
795
796 run_default(image, filter, scale, offset, mean, variance, &conv_2d);
797 run_fused(image, filter, scale, offset, mean, variance, &fused_conv_2d);
798
799 ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
800 ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
801
802 // NOTE(intel-tf): When filter_size is equal to the input image size,
803 // conv2d essentially is element-wise multiplication followed by
804 // a full sum reduction, which causes larger numerical error
805 // than usual cases.
806 if (image_width == filter_size && image_height == filter_size) {
807 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
808 } else {
809 test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
810 }
811 }
812
813 // Verifies that computing Conv2D+BiasAdd in a graph is identical to
814 // FusedConv2D.
VerifyConv2DWithBias(int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)815 void VerifyConv2DWithBias(int filter_size, int filter_count,
816 const std::vector<int>& explicit_paddings = {},
817 int depth = kDepth, int image_width = kImageWidth,
818 int image_height = kImageHeight,
819 int image_batch_count = kImageBatchCount) {
820 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
821 const BiasAddGraphRunner run_default =
822 [this, &explicit_paddings, padding](
823 const Tensor& input_data, const Tensor& filter_data,
__anona74cd8200202( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 824 const Tensor& bias_data, Tensor* out) {
825 RunConv2DWithBias(input_data, filter_data, bias_data, padding,
826 explicit_paddings, out);
827 };
828
829 const BiasAddGraphRunner run_fused =
830 [this, explicit_paddings, padding](
831 const Tensor& input_data, const Tensor& filter_data,
__anona74cd8200302( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 832 const Tensor& bias_data, Tensor* out) {
833 RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"},
834 padding, explicit_paddings, out);
835 };
836
837 VerifyBiasAddTensorsNear(depth, image_width, image_height,
838 image_batch_count, filter_size, filter_count,
839 run_default, run_fused);
840 }
841
842 // Verifies that computing Conv2D+BiasAdd+{Activation} in a graph is identical
843 // to FusedConv2D.
VerifyConv2DWithBiasAndActivation(const string & activation,int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)844 void VerifyConv2DWithBiasAndActivation(
845 const string& activation, int filter_size, int filter_count,
846 const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
847 int image_width = kImageWidth, int image_height = kImageHeight,
848 int image_batch_count = kImageBatchCount) {
849 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
850 const BiasAddGraphRunner run_default =
851 [this, &activation, &explicit_paddings, &padding](
852 const Tensor& input_data, const Tensor& filter_data,
__anona74cd8200402( const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 853 const Tensor& bias_data, Tensor* out) {
854 RunConv2DWithBiasAndActivation(
855 input_data, filter_data, bias_data, activation, padding,
856 explicit_paddings, out,
857 /*allow_gpu_device=*/activation == "Relu");
858 };
859
860 const BiasAddGraphRunner run_fused = [this, &activation, &explicit_paddings,
861 padding](const Tensor& input_data,
862 const Tensor& filter_data,
863 const Tensor& bias_data,
__anona74cd8200502(const Tensor& input_data, const Tensor& filter_data, const Tensor& bias_data, Tensor* out) 864 Tensor* out) {
865 RunFusedConv2DOp(input_data, filter_data, {bias_data},
866 {"BiasAdd", activation}, padding, explicit_paddings, out,
867 /*allow_gpu_device=*/activation == "Relu");
868 };
869
870 VerifyBiasAddTensorsNear(depth, image_width, image_height,
871 image_batch_count, filter_size, filter_count,
872 run_default, run_fused);
873 }
874
875 // Verifies that computing Conv2D+FusedBatchNorm in a graph is identical to
876 // FusedConv2D.
VerifyConv2DWithBatchNorm(int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)877 void VerifyConv2DWithBatchNorm(int filter_size, int filter_count,
878 const std::vector<int>& explicit_paddings = {},
879 int depth = kDepth,
880 int image_width = kImageWidth,
881 int image_height = kImageHeight,
882 int image_batch_count = kImageBatchCount) {
883 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
884 const BatchNormGraphRunner run_default =
885 [this, explicit_paddings, padding](
886 const Tensor& input_data, const Tensor& filter_data,
887 const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200602( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 888 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
889 RunConv2DWithBatchNorm(input_data, filter_data, scale_data,
890 offset_data, mean_data, variance_data, padding,
891 explicit_paddings, out);
892 };
893
894 const BatchNormGraphRunner run_fused =
895 [this, explicit_paddings, padding](
896 const Tensor& input_data, const Tensor& filter_data,
897 const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200702( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 898 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
899 RunFusedConv2DOp(input_data, filter_data,
900 {scale_data, offset_data, mean_data, variance_data},
901 {"FusedBatchNorm"}, padding, explicit_paddings, out);
902 };
903
904 VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
905 image_batch_count, filter_size,
906 filter_count, run_default, run_fused);
907 }
908
909 // Verifies that computing Conv2D+FusedBatchNorm+{Activation} in a graph is
910 // identical to FusedConv2D.
VerifyConv2DWithBatchNormAndActivation(const string & activation,int filter_size,int filter_count,const std::vector<int> & explicit_paddings={},int depth=kDepth,int image_width=kImageWidth,int image_height=kImageHeight,int image_batch_count=kImageBatchCount)911 void VerifyConv2DWithBatchNormAndActivation(
912 const string& activation, int filter_size, int filter_count,
913 const std::vector<int>& explicit_paddings = {}, int depth = kDepth,
914 int image_width = kImageWidth, int image_height = kImageHeight,
915 int image_batch_count = kImageBatchCount) {
916 std::string padding = explicit_paddings.empty() ? "SAME" : "EXPLICIT";
917 const BatchNormGraphRunner run_default =
918 [this, &activation, explicit_paddings, padding](
919 const Tensor& input_data, const Tensor& filter_data,
920 const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200802( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 921 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
922 RunConv2DWithBatchNormAndActivation(
923 input_data, filter_data, scale_data, offset_data, mean_data,
924 variance_data, activation, padding, explicit_paddings, out);
925 };
926
927 const BatchNormGraphRunner run_fused =
928 [this, &activation, explicit_paddings, padding](
929 const Tensor& input_data, const Tensor& filter_data,
930 const Tensor& scale_data, const Tensor& offset_data,
__anona74cd8200902( const Tensor& input_data, const Tensor& filter_data, const Tensor& scale_data, const Tensor& offset_data, const Tensor& mean_data, const Tensor& variance_data, Tensor* out) 931 const Tensor& mean_data, const Tensor& variance_data, Tensor* out) {
932 RunFusedConv2DOp(input_data, filter_data,
933 {scale_data, offset_data, mean_data, variance_data},
934 {"FusedBatchNorm", activation}, padding,
935 explicit_paddings, out);
936 };
937
938 VerifyFusedBatchNormTensorsNear(depth, image_width, image_height,
939 image_batch_count, filter_size,
940 filter_count, run_default, run_fused);
941 }
942 };
943
944 // Conv2D with BatchNorm can be tested only with `T=float`, because default
945 // `FusedBatchNorm` kernel supports only floats for scale, mean and variance.
946
947 template <typename T>
948 class FusedConv2DWithBiasOpTest : public FusedConv2DOpTest<T> {};
949 template <typename T>
950 class FusedConv2DWithBatchNormOpTest : public FusedConv2DOpTest<T> {};
951
952 TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest);
953 TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest);
954
955 // ROCm does not yet support the _FusedConv2D op,
956 // Therefore disable tests that check _FusedConv2D, when building with ROCm
957
958 #ifndef TENSORFLOW_USE_ROCM
959 // -------------------------------------------------------------------------- //
960 // Conv2D + BiasAdd + {Activation} //
961 // -------------------------------------------------------------------------- //
962
TYPED_TEST_P(FusedConv2DWithBiasOpTest,OneByOneConvolution)963 TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolution) {
964 const int filter_size = 1;
965 const int filter_count = 12;
966 this->VerifyConv2DWithBias(filter_size, filter_count);
967 }
968
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ImageSizeConvolution)969 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ImageSizeConvolution) {
970 const int filter_size = TestFixture::kImageWidth;
971 const int filter_count = 12;
972 this->VerifyConv2DWithBias(filter_size, filter_count);
973 }
974
TYPED_TEST_P(FusedConv2DWithBiasOpTest,SpatialConvolution)975 TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolution) {
976 const int filter_size = 3;
977 const int filter_count = 12;
978 this->VerifyConv2DWithBias(filter_size, filter_count);
979 }
980
981 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ExplicitPaddingConvolution)982 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ExplicitPaddingConvolution) {
983 const int filter_size = 3;
984 const int filter_count = 12;
985 this->VerifyConv2DWithBias(filter_size, filter_count,
986 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
987 }
988 #endif
989
TYPED_TEST_P(FusedConv2DWithBiasOpTest,OneByOneConvolutionAndActivation)990 TYPED_TEST_P(FusedConv2DWithBiasOpTest, OneByOneConvolutionAndActivation) {
991 // Requires full precision Conv2D op
992 tensorflow::enable_tensor_float_32_execution(false);
993 const int filter_size = 1;
994 const int filter_count = 12;
995 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
996 this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
997 filter_count);
998 }
999 }
1000
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ImageSizeConvolutionAndActivation)1001 TYPED_TEST_P(FusedConv2DWithBiasOpTest, ImageSizeConvolutionAndActivation) {
1002 const int filter_size = TestFixture::kImageWidth;
1003 const int filter_count = 12;
1004 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1005 this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1006 filter_count);
1007 }
1008 }
1009
TYPED_TEST_P(FusedConv2DWithBiasOpTest,SpatialConvolutionAndActivation)1010 TYPED_TEST_P(FusedConv2DWithBiasOpTest, SpatialConvolutionAndActivation) {
1011 const int filter_size = 3;
1012 const int filter_count = 12;
1013 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1014 this->VerifyConv2DWithBiasAndActivation(activation, filter_size,
1015 filter_count);
1016 }
1017 }
1018
1019 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBiasOpTest,ExplicitPaddingConvolutionAndActivation)1020 TYPED_TEST_P(FusedConv2DWithBiasOpTest,
1021 ExplicitPaddingConvolutionAndActivation) {
1022 const int filter_size = 3;
1023 const int filter_count = 12;
1024 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1025 this->VerifyConv2DWithBiasAndActivation(
1026 activation, filter_size, filter_count,
1027 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1028 }
1029 }
1030 #endif
1031
1032 // -------------------------------------------------------------------------- //
1033 // Conv2D + FusedBatchNorm + {Activation} //
1034 // -------------------------------------------------------------------------- //
1035
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,OneByOneConvolution)1036 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolution) {
1037 const int filter_size = 1;
1038 const int filter_count = 12;
1039 this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1040 }
1041
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ImageSizeConvolution)1042 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ImageSizeConvolution) {
1043 const int filter_size = TestFixture::kImageWidth;
1044 const int filter_count = 12;
1045 this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1046 }
1047
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,SpatialConvolution)1048 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolution) {
1049 const int filter_size = 3;
1050 const int filter_count = 12;
1051 this->VerifyConv2DWithBatchNorm(filter_size, filter_count);
1052 }
1053
1054 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ExplicitPaddingConvolution)1055 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, ExplicitPaddingConvolution) {
1056 const int filter_size = 3;
1057 const int filter_count = 12;
1058 this->VerifyConv2DWithBatchNorm(
1059 filter_size, filter_count,
1060 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1061 }
1062 #endif
1063
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,OneByOneConvolutionAndActivation)1064 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, OneByOneConvolutionAndActivation) {
1065 const int filter_size = 1;
1066 const int filter_count = 12;
1067 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1068 this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1069 filter_count);
1070 }
1071 }
1072
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ImageSizeConvolutionAndActivation)1073 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
1074 ImageSizeConvolutionAndActivation) {
1075 const int filter_size = TestFixture::kImageWidth;
1076 const int filter_count = 12;
1077 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1078 this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1079 filter_count);
1080 }
1081 }
1082
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,SpatialConvolutionAndActivation)1083 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest, SpatialConvolutionAndActivation) {
1084 const int filter_size = 3;
1085 const int filter_count = 12;
1086 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1087 this->VerifyConv2DWithBatchNormAndActivation(activation, filter_size,
1088 filter_count);
1089 }
1090 }
1091
1092 #ifndef INTEL_MKL
TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,ExplicitPaddingConvolutionAndActivation)1093 TYPED_TEST_P(FusedConv2DWithBatchNormOpTest,
1094 ExplicitPaddingConvolutionAndActivation) {
1095 const int filter_size = 3;
1096 const int filter_count = 12;
1097 for (const string& activation : {"Relu", "Relu6", "Elu", "LeakyRelu"}) {
1098 this->VerifyConv2DWithBatchNormAndActivation(
1099 activation, filter_size, filter_count,
1100 /*explicit_paddings=*/{0, 0, 1, 2, 3, 4, 0, 0});
1101 }
1102 }
1103 #endif
1104
1105 #ifndef INTEL_MKL
1106 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, //
1107 OneByOneConvolution, //
1108 ImageSizeConvolution, //
1109 SpatialConvolution, //
1110 ExplicitPaddingConvolution, //
1111 OneByOneConvolutionAndActivation, //
1112 ImageSizeConvolutionAndActivation, //
1113 SpatialConvolutionAndActivation, //
1114 ExplicitPaddingConvolutionAndActivation);
1115
1116 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, //
1117 OneByOneConvolution, //
1118 ImageSizeConvolution, //
1119 SpatialConvolution, //
1120 ExplicitPaddingConvolution, //
1121 OneByOneConvolutionAndActivation, //
1122 ImageSizeConvolutionAndActivation, //
1123 SpatialConvolutionAndActivation, //
1124 ExplicitPaddingConvolutionAndActivation);
1125 #else
1126 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBiasOpTest, //
1127 OneByOneConvolution, //
1128 ImageSizeConvolution, //
1129 SpatialConvolution, //
1130 OneByOneConvolutionAndActivation, //
1131 ImageSizeConvolutionAndActivation, //
1132 SpatialConvolutionAndActivation);
1133
1134 REGISTER_TYPED_TEST_SUITE_P(FusedConv2DWithBatchNormOpTest, //
1135 OneByOneConvolution, //
1136 ImageSizeConvolution, //
1137 SpatialConvolution, //
1138 OneByOneConvolutionAndActivation, //
1139 ImageSizeConvolutionAndActivation, //
1140 SpatialConvolutionAndActivation);
1141 #endif
1142
1143 using FusedBiasAddDataTypes = ::testing::Types<float, double>;
1144 INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBiasOpTest,
1145 FusedBiasAddDataTypes);
1146
1147 using FusedBatchNormDataTypes = ::testing::Types<float>;
1148 INSTANTIATE_TYPED_TEST_SUITE_P(Test, FusedConv2DWithBatchNormOpTest,
1149 FusedBatchNormDataTypes);
1150
1151 #endif // TENSORFLOW_USE_ROCM
1152 } // namespace tensorflow
1153