1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for Grappler LayoutOptimizer.""" 16 17import numpy as np 18 19from tensorflow.core.protobuf import config_pb2 20from tensorflow.core.protobuf import device_properties_pb2 21from tensorflow.core.protobuf import rewriter_config_pb2 22from tensorflow.core.protobuf import saver_pb2 23from tensorflow.python.client import session 24from tensorflow.python.framework import constant_op 25from tensorflow.python.framework import dtypes 26from tensorflow.python.framework import ops 27from tensorflow.python.framework import random_seed 28from tensorflow.python.framework import test_util 29from tensorflow.python.grappler import cluster as gcluster 30from tensorflow.python.grappler import tf_optimizer 31from tensorflow.python.layers import convolutional as conv_layers 32from tensorflow.python.ops import array_ops 33from tensorflow.python.ops import gen_array_ops 34from tensorflow.python.ops import gen_math_ops 35from tensorflow.python.ops import gen_nn_ops 36from tensorflow.python.ops import map_fn 37from tensorflow.python.ops import math_ops 38from tensorflow.python.ops import nn 39from tensorflow.python.ops import random_ops 40from tensorflow.python.ops import state_ops 41from tensorflow.python.ops import variables 42from tensorflow.python.platform import test 43from tensorflow.python.training import gradient_descent 44from tensorflow.python.training import saver as saver_lib 45 46 47def _weight(shape): 48 """Generates a weight of a given shape.""" 49 return random_ops.truncated_normal(shape, seed=0, stddev=0.1) 50 51 52def _bias(shape): 53 """Generates a bias of a given shape.""" 54 return constant_op.constant(0.1, shape=shape) 55 56 57def _conv2d(x, w): 58 """Returns a 2d convolution layer with full stride.""" 59 return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') 60 61 62def _max_pool_2x2(x): 63 """Downsamples a feature map by 2X.""" 64 return nn.max_pool( 65 x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 66 67 68# Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py 69def _two_layer_model(x): 70 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 71 w_conv1 = _weight([5, 5, 1, 32]) 72 b_conv1 = _bias([32]) 73 h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1) 74 h_pool1 = _max_pool_2x2(h_conv1) 75 w_conv2 = _weight([5, 5, 32, 64]) 76 b_conv2 = _bias([64]) 77 h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2) 78 h_pool2 = _max_pool_2x2(h_conv2) 79 return h_pool2 80 81 82def _model_with_second_port(): 83 random_seed.set_random_seed(0) 84 x = random_ops.truncated_normal([2, 5, 5, 4], seed=0) 85 scale = constant_op.constant(0.1, shape=[4]) 86 offset = constant_op.constant(0.3, shape=[4]) 87 y, mean, _ = nn.fused_batch_norm(x, scale, offset) 88 mul = math_ops.add(y, mean) 89 output = array_ops.identity(mul) 90 return output 91 92 93def _model_with_branch(x): 94 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 95 w_conv1 = _weight([5, 5, 1, 32]) 96 w_conv2 = _weight([5, 5, 1, 32]) 97 c_conv1 = _conv2d(x_image, w_conv1) 98 c_conv2 = _conv2d(x_image, w_conv2) 99 add = math_ops.add(c_conv1, c_conv2) 100 return add 101 102 103def _model_with_vec_and_4d(x): 104 x_image = array_ops.reshape(x, [-1, 28, 28, 1]) 105 w_conv1 = _weight([5, 5, 1, 32]) 106 c_conv1 = _conv2d(x_image, w_conv1) 107 vector = constant_op.constant(6.4, shape=[32]) 108 add = math_ops.add(c_conv1, vector) 109 return add 110 111 112def _loop(): 113 random_seed.set_random_seed(0) 114 x1 = random_ops.truncated_normal([1, 784], seed=0) 115 x2 = random_ops.truncated_normal([1, 784], seed=0) 116 x3 = random_ops.truncated_normal([1, 784], seed=0) 117 x4 = random_ops.truncated_normal([1, 784], seed=0) 118 elems = (x1, x2, x3, x4) 119 outputs = map_fn.map_fn(_two_layer_model, elems, dtype=dtypes.float32) 120 return outputs 121 122 123def _loop_with_branch(): 124 random_seed.set_random_seed(0) 125 x1 = random_ops.truncated_normal([1, 784], seed=0) 126 x2 = random_ops.truncated_normal([1, 784], seed=0) 127 x3 = random_ops.truncated_normal([1, 784], seed=0) 128 x4 = random_ops.truncated_normal([1, 784], seed=0) 129 elems = (x1, x2, x3, x4) 130 outputs = map_fn.map_fn(_model_with_branch, elems, dtype=dtypes.float32) 131 return outputs 132 133 134def _loop_with_vec_and_4d(): 135 random_seed.set_random_seed(0) 136 x1 = random_ops.truncated_normal([1, 784], seed=0) 137 x2 = random_ops.truncated_normal([1, 784], seed=0) 138 x3 = random_ops.truncated_normal([1, 784], seed=0) 139 x4 = random_ops.truncated_normal([1, 784], seed=0) 140 elems = (x1, x2, x3, x4) 141 outputs = map_fn.map_fn(_model_with_vec_and_4d, elems, dtype=dtypes.float32) 142 return outputs 143 144 145def _get_config(layout_optimizer=True): 146 if layout_optimizer: 147 rewrite_options = rewriter_config_pb2.RewriterConfig( 148 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON, 149 # do not remove duplicated nodes 150 arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF) 151 else: 152 rewrite_options = rewriter_config_pb2.RewriterConfig( 153 layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, 154 # do not remove duplicated nodes 155 arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF) 156 rewrite_options.min_graph_nodes = -1 157 graph_options = config_pb2.GraphOptions( 158 rewrite_options=rewrite_options, build_cost_model=1) 159 config = config_pb2.ConfigProto(graph_options=graph_options) 160 config.graph_options.optimizer_options.opt_level = -1 161 return config 162 163 164def _simple_metagraph(depthwise=False): 165 random_seed.set_random_seed(0) 166 x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0)) 167 conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d 168 y = conv(x, 32, [3, 3]) 169 z = conv(y, 32, [3, 3]) 170 optimizer = gradient_descent.GradientDescentOptimizer(1e-4) 171 loss = math_ops.reduce_mean(z) 172 train_op = optimizer.minimize(loss) 173 graph = ops.get_default_graph() 174 graph.add_to_collection('train_op', train_op) 175 meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def()) 176 return meta_graph 177 178 179def _get_cluster(): 180 named_device = device_properties_pb2.NamedDevice() 181 named_device.name = '/GPU:0' 182 named_device.properties.type = 'GPU' 183 named_device.properties.num_cores = 24 184 named_device.properties.frequency = 1000 185 named_device.properties.environment['architecture'] = '4' 186 cluster = gcluster.Cluster(devices=[named_device]) 187 return cluster 188 189 190def _is_transpose(node): 191 return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith( 192 'TransposeNCHWToNHWC-LayoutOptimizer') or node.endswith( 193 'TransposeNDHWCToNCDHW-LayoutOptimizer') or node.endswith( 194 'TransposeNCDHWToNDHWC-LayoutOptimizer') 195 196 197def _is_permute(node): 198 return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith( 199 'VecPermuteNCHWToNHWC-LayoutOptimizer') 200 201 202@test_util.for_all_test_methods(test_util.no_xla_auto_jit, 203 'Test does not apply in XLA setting') 204class LayoutOptimizerTest(test.TestCase): 205 """Tests the Grappler layout optimizer.""" 206 207 def _assert_trans_nchw_to_nhwc(self, name, nodes): 208 self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes) 209 210 def _assert_trans_nhwc_to_nchw(self, name, nodes): 211 self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes) 212 213 def _assert_trans_ncdhw_to_ndhwc(self, name, nodes): 214 self.assertIn(name + '-TransposeNCDHWToNDHWC-LayoutOptimizer', nodes) 215 216 def _assert_trans_ndhwc_to_ncdhw(self, name, nodes): 217 self.assertIn(name + '-TransposeNDHWCToNCDHW-LayoutOptimizer', nodes) 218 219 def _assert_map_nhwc_to_nchw(self, name, nodes): 220 self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes) 221 222 def _assert_map_ndhwc_to_ncdhw(self, name, nodes): 223 self.assertIn(name + '-DataFormatDimMapNDHWCToNCDHW-LayoutOptimizer', nodes) 224 225 def _assert_vec_nchw_to_nhwc(self, name, nodes): 226 self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes) 227 228 def _assert_vec_nhwc_to_nchw(self, name, nodes): 229 self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes) 230 231 def _assert_vec_ncdhw_to_ndhwc(self, name, nodes): 232 self.assertIn(name + '-DataFormatVecPermuteNCDHWToNDHWC-LayoutOptimizer', 233 nodes) 234 235 def _assert_vec_ndhwc_to_ncdhw(self, name, nodes): 236 self.assertIn(name + '-DataFormatVecPermuteNDHWCToNCDHW-LayoutOptimizer', 237 nodes) 238 239 def _train(self, checkpoint_path, layout_optimizer=False, restore=False): 240 ops.reset_default_graph() 241 graph = ops.get_default_graph() 242 with session.Session( 243 config=_get_config(layout_optimizer), graph=graph) as sess: 244 batch = 2 245 height = 6 246 width = 7 247 input_channels = 3 248 shape = [batch, height, width, input_channels] 249 image = array_ops.placeholder(dtype='float32', shape=shape) 250 conv1 = conv_layers.conv2d(image, 32, [3, 3]) 251 conv2 = conv_layers.conv2d(conv1, 32, [3, 3]) 252 optimizer = gradient_descent.GradientDescentOptimizer(0.01) 253 loss = math_ops.reduce_mean(conv2) 254 train_op = optimizer.minimize(loss) 255 saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) 256 257 if restore: 258 saver.restore(sess, checkpoint_path) 259 else: 260 self.evaluate(variables.global_variables_initializer()) 261 262 np.random.seed(0) 263 for _ in range(2): 264 image_val = np.random.rand(*shape).astype(np.float32) 265 sess.run([loss, train_op], feed_dict={image: image_val}) 266 267 if restore: 268 all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) 269 all_vars_values = [var.eval(session=sess) for var in all_vars] 270 return all_vars_values 271 else: 272 saver.save(sess, checkpoint_path) 273 274 @test_util.deprecated_graph_mode_only 275 def testTwoConvLayers(self): 276 if test.is_gpu_available(cuda_only=True): 277 random_seed.set_random_seed(0) 278 x = random_ops.truncated_normal([1, 784], seed=0) 279 output = _two_layer_model(x) 280 281 with session.Session(config=_get_config(False)) as sess: 282 output_val_ref = self.evaluate(output) 283 284 with session.Session(config=_get_config()) as sess: 285 metadata = config_pb2.RunMetadata() 286 output_val = sess.run(output, run_metadata=metadata) 287 288 nodes = [] 289 num_transposes = 0 290 for node in metadata.cost_graph.node: 291 if _is_transpose(node.name): 292 num_transposes += 1 293 nodes.append(node.name) 294 295 # Four transposes were initially added in the Expand phase of 296 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 297 expected_num_transposes = 2 298 self.assertEqual(expected_num_transposes, num_transposes) 299 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 300 self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes) 301 302 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 303 304 @test_util.deprecated_graph_mode_only 305 def testSplitWithNonConstAxis(self): 306 if test.is_gpu_available(cuda_only=True): 307 random_seed.set_random_seed(0) 308 x = random_ops.truncated_normal([1, 784], seed=0) 309 conv = _two_layer_model(x) 310 dim = array_ops.placeholder(dtype='int32') 311 split = array_ops.split(conv, 2, axis=dim) 312 scale = constant_op.constant(0.1, shape=[32]) 313 offset = constant_op.constant(0.3, shape=[32]) 314 bn0 = nn.fused_batch_norm(split[0], scale, offset) 315 bn1 = nn.fused_batch_norm(split[1], scale, offset) 316 add = bn0[0] + bn1[0] 317 output = array_ops.identity(add) 318 319 with session.Session(config=_get_config(False)) as sess: 320 output_val_ref = sess.run(output, feed_dict={dim: 3}) 321 322 with session.Session(config=_get_config()) as sess: 323 metadata = config_pb2.RunMetadata() 324 output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) 325 326 nodes = [] 327 num_transposes = 0 328 for node in metadata.cost_graph.node: 329 if _is_transpose(node.name): 330 num_transposes += 1 331 nodes.append(node.name) 332 333 expected_num_transposes = 2 334 self.assertEqual(expected_num_transposes, num_transposes) 335 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 336 self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes) 337 self._assert_map_nhwc_to_nchw('split-0', nodes) 338 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 339 340 @test_util.deprecated_graph_mode_only 341 def testSplitVWithNonConstAxis(self): 342 if test.is_gpu_available(cuda_only=True): 343 random_seed.set_random_seed(0) 344 x = random_ops.truncated_normal([1, 784], seed=0) 345 conv = _two_layer_model(x) 346 dim = array_ops.placeholder(dtype='int32') 347 sizes = constant_op.constant([50, 10, 4], shape=[3]) 348 split = gen_array_ops.split_v( 349 value=conv, size_splits=sizes, axis=dim, num_split=3) 350 output = math_ops.reduce_sum(split[0]) 351 352 with session.Session(config=_get_config(False)) as sess: 353 output_val_ref = sess.run(output, feed_dict={dim: 3}) 354 355 with session.Session(config=_get_config()) as sess: 356 metadata = config_pb2.RunMetadata() 357 output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3}) 358 359 nodes = [] 360 num_transposes = 0 361 for node in metadata.cost_graph.node: 362 if _is_transpose(node.name): 363 num_transposes += 1 364 nodes.append(node.name) 365 366 # Four transposes were initially added in the Expand phase of 367 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 368 expected_num_transposes = 2 369 self.assertEqual(expected_num_transposes, num_transposes) 370 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 371 self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes) 372 self._assert_map_nhwc_to_nchw('SplitV-2', nodes) 373 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 374 375 @test_util.deprecated_graph_mode_only 376 def testPadWithConstPaddings(self): 377 if test.is_gpu_available(cuda_only=True): 378 random_seed.set_random_seed(0) 379 x = random_ops.truncated_normal([1, 784], seed=0) 380 conv = _two_layer_model(x) 381 paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] 382 paddings = constant_op.constant( 383 paddings_val, dtype='int32', name='PaddingsConst') 384 pad = array_ops.pad(conv, paddings) 385 output = array_ops.identity(pad) 386 387 with session.Session(config=_get_config(False)) as sess: 388 output_val_ref = self.evaluate(output) 389 390 with session.Session(config=_get_config()) as sess: 391 metadata = config_pb2.RunMetadata() 392 output_val = sess.run(output, run_metadata=metadata) 393 394 nodes = [] 395 num_transposes = 0 396 for node in metadata.cost_graph.node: 397 if _is_transpose(node.name): 398 num_transposes += 1 399 nodes.append(node.name) 400 401 # Four transposes were initially added in the Expand phase of 402 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 403 expected_num_transposes = 2 404 self.assertEqual(expected_num_transposes, num_transposes) 405 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 406 self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes) 407 self.assertIn('Pad-1-LayoutOptimizer', nodes) 408 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 409 410 @test_util.deprecated_graph_mode_only 411 def testReduceSum(self): 412 if test.is_gpu_available(cuda_only=True): 413 random_seed.set_random_seed(0) 414 x = random_ops.truncated_normal([1, 784], seed=0) 415 conv = _two_layer_model(x) 416 reduce_sum = math_ops.reduce_sum(conv) 417 output = array_ops.identity(reduce_sum) 418 419 with session.Session(config=_get_config(False)) as sess: 420 output_val_ref = self.evaluate(output) 421 422 with session.Session(config=_get_config()) as sess: 423 metadata = config_pb2.RunMetadata() 424 output_val = sess.run(output, run_metadata=metadata) 425 426 nodes = [] 427 num_transposes = 0 428 for node in metadata.cost_graph.node: 429 if _is_transpose(node.name): 430 num_transposes += 1 431 nodes.append(node.name) 432 433 # Three transposes were initially added in the Expand phase of 434 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 435 expected_num_transposes = 1 436 self.assertEqual(expected_num_transposes, num_transposes) 437 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 438 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 439 440 @test_util.deprecated_graph_mode_only 441 def testCast(self): 442 if test.is_gpu_available(cuda_only=True): 443 random_seed.set_random_seed(0) 444 x = random_ops.truncated_normal([1, 784], seed=0) 445 conv = _two_layer_model(x) 446 cast = math_ops.cast(conv, dtype='bool') 447 output = array_ops.identity(cast) 448 449 with session.Session(config=_get_config(False)) as sess: 450 output_val_ref = self.evaluate(output) 451 452 with session.Session(config=_get_config()) as sess: 453 metadata = config_pb2.RunMetadata() 454 output_val = sess.run(output, run_metadata=metadata) 455 456 nodes = [] 457 num_transposes = 0 458 for node in metadata.cost_graph.node: 459 if _is_transpose(node.name): 460 num_transposes += 1 461 nodes.append(node.name) 462 463 # Four transposes were initially added in the Expand phase of 464 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 465 expected_num_transposes = 2 466 self.assertEqual(expected_num_transposes, num_transposes) 467 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 468 self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes) 469 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 470 471 @test_util.deprecated_graph_mode_only 472 def testSqueeze(self): 473 if test.is_gpu_available(cuda_only=True): 474 random_seed.set_random_seed(0) 475 x = random_ops.truncated_normal([1, 784], seed=0) 476 conv = _two_layer_model(x) 477 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2]) 478 squeeze = array_ops.squeeze(reduce_sum) 479 output = array_ops.identity(squeeze) 480 481 with session.Session(config=_get_config(False)) as sess: 482 output_val_ref = self.evaluate(output) 483 484 with session.Session(config=_get_config()) as sess: 485 metadata = config_pb2.RunMetadata() 486 output_val = sess.run(output, run_metadata=metadata) 487 488 nodes = [] 489 num_transposes = 0 490 for node in metadata.cost_graph.node: 491 if _is_transpose(node.name): 492 num_transposes += 1 493 nodes.append(node.name) 494 495 # Three transposes were initially added in the Expand phase of 496 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 497 expected_num_transposes = 1 498 self.assertEqual(expected_num_transposes, num_transposes) 499 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 500 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 501 502 @test_util.deprecated_graph_mode_only 503 def testSqueezeAlongHW(self): 504 if test.is_gpu_available(cuda_only=True): 505 random_seed.set_random_seed(0) 506 x = random_ops.truncated_normal([1, 784], seed=0) 507 conv = _two_layer_model(x) 508 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keepdims=True) 509 squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) 510 output = array_ops.identity(squeeze) 511 512 with session.Session(config=_get_config(False)) as sess: 513 output_val_ref = self.evaluate(output) 514 515 with session.Session(config=_get_config()) as sess: 516 metadata = config_pb2.RunMetadata() 517 output_val = sess.run(output, run_metadata=metadata) 518 519 nodes = [] 520 num_transposes = 0 521 for node in metadata.cost_graph.node: 522 if _is_transpose(node.name): 523 num_transposes += 1 524 nodes.append(node.name) 525 526 # Three transposes were initially added in the Expand phase of 527 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 528 expected_num_transposes = 1 529 self.assertEqual(expected_num_transposes, num_transposes) 530 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 531 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 532 533 @test_util.deprecated_graph_mode_only 534 def testSqueezeAlongNHW(self): 535 if test.is_gpu_available(cuda_only=True): 536 random_seed.set_random_seed(0) 537 x = random_ops.truncated_normal([1, 784], seed=0) 538 conv = _two_layer_model(x) 539 reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keepdims=True) 540 squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) 541 output = array_ops.identity(squeeze) 542 543 with session.Session(config=_get_config(False)) as sess: 544 output_val_ref = self.evaluate(output) 545 546 with session.Session(config=_get_config()) as sess: 547 metadata = config_pb2.RunMetadata() 548 output_val = sess.run(output, run_metadata=metadata) 549 550 nodes = [] 551 num_transposes = 0 552 for node in metadata.cost_graph.node: 553 if _is_transpose(node.name): 554 num_transposes += 1 555 nodes.append(node.name) 556 557 # Three transposes were initially added in the Expand phase of 558 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 559 expected_num_transposes = 1 560 self.assertEqual(expected_num_transposes, num_transposes) 561 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 562 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 563 564 @test_util.deprecated_graph_mode_only 565 def testReduceSumAlongHWC(self): 566 if test.is_gpu_available(cuda_only=True): 567 random_seed.set_random_seed(0) 568 x = random_ops.truncated_normal([1, 784], seed=0) 569 conv = _two_layer_model(x) 570 reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3]) 571 output = array_ops.identity(reduce_sum) 572 573 with session.Session(config=_get_config(False)) as sess: 574 output_val_ref = self.evaluate(output) 575 576 with session.Session(config=_get_config()) as sess: 577 metadata = config_pb2.RunMetadata() 578 output_val = sess.run(output, run_metadata=metadata) 579 580 nodes = [] 581 num_transposes = 0 582 for node in metadata.cost_graph.node: 583 if _is_transpose(node.name): 584 num_transposes += 1 585 nodes.append(node.name) 586 587 # Three transposes were initially added in the Expand phase of 588 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 589 expected_num_transposes = 1 590 self.assertEqual(expected_num_transposes, num_transposes) 591 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 592 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 593 594 @test_util.deprecated_graph_mode_only 595 def testReduceSumAlongNHW(self): 596 if test.is_gpu_available(cuda_only=True): 597 random_seed.set_random_seed(0) 598 x = random_ops.truncated_normal([1, 784], seed=0) 599 conv = _two_layer_model(x) 600 reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2]) 601 output = array_ops.identity(reduce_sum) 602 603 with session.Session(config=_get_config(False)) as sess: 604 output_val_ref = self.evaluate(output) 605 606 with session.Session(config=_get_config()) as sess: 607 metadata = config_pb2.RunMetadata() 608 output_val = sess.run(output, run_metadata=metadata) 609 610 nodes = [] 611 num_transposes = 0 612 for node in metadata.cost_graph.node: 613 if _is_transpose(node.name): 614 num_transposes += 1 615 nodes.append(node.name) 616 617 # Three transposes were initially added in the Expand phase of 618 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 619 expected_num_transposes = 1 620 self.assertEqual(expected_num_transposes, num_transposes) 621 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 622 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 623 624 @test_util.deprecated_graph_mode_only 625 def testReduceSumAlongC(self): 626 if test.is_gpu_available(cuda_only=True): 627 random_seed.set_random_seed(0) 628 x = random_ops.truncated_normal([1, 784], seed=0) 629 conv = _two_layer_model(x) 630 reduce_sum = math_ops.reduce_sum(conv, axis=[3]) 631 output = array_ops.identity(reduce_sum) 632 633 with session.Session(config=_get_config(False)) as sess: 634 output_val_ref = self.evaluate(output) 635 636 with session.Session(config=_get_config()) as sess: 637 metadata = config_pb2.RunMetadata() 638 output_val = sess.run(output, run_metadata=metadata) 639 640 nodes = [] 641 num_transposes = 0 642 for node in metadata.cost_graph.node: 643 if _is_transpose(node.name): 644 num_transposes += 1 645 nodes.append(node.name) 646 647 # Three transposes were initially added in the Expand phase of 648 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 649 expected_num_transposes = 1 650 self.assertEqual(expected_num_transposes, num_transposes) 651 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 652 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 653 654 @test_util.deprecated_graph_mode_only 655 def testReduceSumAlongCKeepDims(self): 656 if test.is_gpu_available(cuda_only=True): 657 random_seed.set_random_seed(0) 658 x = random_ops.truncated_normal([1, 784], seed=0) 659 conv = _two_layer_model(x) 660 reduce_sum = math_ops.reduce_sum(conv, axis=[3], keepdims=True) 661 output = array_ops.identity(reduce_sum) 662 663 with session.Session(config=_get_config(False)) as sess: 664 output_val_ref = self.evaluate(output) 665 666 with session.Session(config=_get_config()) as sess: 667 metadata = config_pb2.RunMetadata() 668 output_val = sess.run(output, run_metadata=metadata) 669 670 nodes = [] 671 num_transposes = 0 672 for node in metadata.cost_graph.node: 673 if _is_transpose(node.name): 674 num_transposes += 1 675 nodes.append(node.name) 676 677 # Four transposes were initially added in the Expand phase of 678 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 679 expected_num_transposes = 2 680 self.assertEqual(expected_num_transposes, num_transposes) 681 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 682 self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes) 683 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 684 685 @test_util.deprecated_graph_mode_only 686 def testReduceSumAlongHKeepDims(self): 687 if test.is_gpu_available(cuda_only=True): 688 random_seed.set_random_seed(0) 689 x = random_ops.truncated_normal([1, 784], seed=0) 690 conv = _two_layer_model(x) 691 reduce_sum = math_ops.reduce_sum(conv, axis=[2], keepdims=True) 692 output = array_ops.identity(reduce_sum) 693 694 with session.Session(config=_get_config(False)) as sess: 695 output_val_ref = self.evaluate(output) 696 697 with session.Session(config=_get_config()) as sess: 698 metadata = config_pb2.RunMetadata() 699 output_val = sess.run(output, run_metadata=metadata) 700 701 nodes = [] 702 num_transposes = 0 703 for node in metadata.cost_graph.node: 704 if _is_transpose(node.name): 705 num_transposes += 1 706 nodes.append(node.name) 707 708 # Four transposes were initially added in the Expand phase of 709 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 710 expected_num_transposes = 2 711 self.assertEqual(expected_num_transposes, num_transposes) 712 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 713 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 714 715 @test_util.deprecated_graph_mode_only 716 def testReduceSumAlongWCKeepDims(self): 717 if test.is_gpu_available(cuda_only=True): 718 random_seed.set_random_seed(0) 719 x = random_ops.truncated_normal([1, 784], seed=0) 720 conv = _two_layer_model(x) 721 reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keepdims=True) 722 output = array_ops.identity(reduce_sum) 723 724 with session.Session(config=_get_config(False)) as sess: 725 output_val_ref = self.evaluate(output) 726 727 with session.Session(config=_get_config()) as sess: 728 metadata = config_pb2.RunMetadata() 729 output_val = sess.run(output, run_metadata=metadata) 730 731 nodes = [] 732 num_transposes = 0 733 for node in metadata.cost_graph.node: 734 if _is_transpose(node.name): 735 num_transposes += 1 736 nodes.append(node.name) 737 738 # Four transposes were initially added in the Expand phase of 739 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 740 expected_num_transposes = 2 741 self.assertEqual(expected_num_transposes, num_transposes) 742 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 743 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 744 745 @test_util.deprecated_graph_mode_only 746 def testConcatWithControlDependency(self): 747 if test.is_gpu_available(cuda_only=True): 748 random_seed.set_random_seed(0) 749 x = random_ops.truncated_normal([1, 784], seed=0) 750 conv = _two_layer_model(x) 751 axis = constant_op.constant(3) 752 var = variables.Variable(3) 753 assign = state_ops.assign(var, 6) 754 with ops.control_dependencies([assign]): 755 concat = array_ops.concat([conv, conv], axis) 756 output = array_ops.identity(concat) 757 758 with session.Session(config=_get_config(False)) as sess: 759 output_val_ref = self.evaluate(output) 760 761 with session.Session(config=_get_config()) as sess: 762 metadata = config_pb2.RunMetadata() 763 output_val = sess.run(output, run_metadata=metadata) 764 765 nodes = [] 766 num_transposes = 0 767 for node in metadata.cost_graph.node: 768 if _is_transpose(node.name): 769 num_transposes += 1 770 nodes.append(node.name) 771 772 # Four transposes were initially added in the Expand phase of 773 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 774 expected_num_transposes = 2 775 self.assertEqual(expected_num_transposes, num_transposes) 776 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 777 self._assert_trans_nchw_to_nhwc('concat-0-0', nodes) 778 self.assertIn('concat-2-LayoutOptimizer', nodes) 779 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 780 781 @test_util.deprecated_graph_mode_only 782 def testConcatWithControlDependencyFor5DTensor(self): 783 if not test.is_gpu_available(cuda_only=True): 784 self.skipTest('GPU required') 785 random_seed.set_random_seed(0) 786 x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 787 w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0) 788 strides = [1, 1, 1, 1, 1] 789 y = gen_nn_ops.conv3d(x, w, strides, 'SAME') 790 axis = constant_op.constant(4) 791 var = variables.Variable(3) 792 assign = state_ops.assign(var, 6) 793 with ops.control_dependencies([assign]): 794 concat = array_ops.concat([y, y], axis) 795 output = array_ops.identity(concat) 796 797 with session.Session(config=_get_config(False)) as sess: 798 output_val_ref = self.evaluate(output) 799 800 with session.Session(config=_get_config()) as sess: 801 metadata = config_pb2.RunMetadata() 802 output_val = sess.run(output, run_metadata=metadata) 803 804 nodes = [] 805 num_transposes = 0 806 for node in metadata.cost_graph.node: 807 if _is_transpose(node.name): 808 num_transposes += 1 809 nodes.append(node.name) 810 811 # Four transposes were initially added in the Expand phase of 812 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 813 expected_num_transposes = 2 814 self.assertEqual(expected_num_transposes, num_transposes) 815 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 816 self._assert_trans_ncdhw_to_ndhwc('concat-0-0', nodes) 817 self._assert_map_ndhwc_to_ncdhw('concat-2', nodes) 818 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 819 820 @test_util.deprecated_graph_mode_only 821 def testFill(self): 822 if test.is_gpu_available(cuda_only=True): 823 random_seed.set_random_seed(0) 824 x = array_ops.placeholder(dtype='float32') 825 conv = _two_layer_model(x) 826 shape = array_ops.shape(conv) 827 scalar = array_ops.constant(5.7) 828 fill = array_ops.fill(shape, scalar) 829 output = array_ops.identity(fill) 830 831 x_val = [3.4] * 784 832 with session.Session(config=_get_config(False)) as sess: 833 output_val_ref = sess.run(output, feed_dict={x: x_val}) 834 835 with session.Session(config=_get_config()) as sess: 836 metadata = config_pb2.RunMetadata() 837 output_val = sess.run( 838 output, run_metadata=metadata, feed_dict={ 839 x: x_val 840 }) 841 842 nodes = [] 843 num_transposes = 0 844 num_vec_permute = 0 845 for node in metadata.cost_graph.node: 846 if _is_transpose(node.name): 847 num_transposes += 1 848 if _is_permute(node.name): 849 num_vec_permute += 1 850 nodes.append(node.name) 851 852 # Four transposes were initially added in the Expand phase of 853 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 854 expected_num_transposes = 2 855 self.assertEqual(expected_num_transposes, num_transposes) 856 # Two vector permute nodes were initially added in the Expand phase of 857 # LayoutOptimizer; they cancelled out each other in the Collapse phase. 858 expected_vec_permute = 0 859 self.assertEqual(expected_vec_permute, num_vec_permute) 860 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 861 self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes) 862 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 863 864 @test_util.deprecated_graph_mode_only 865 def testTile(self): 866 if test.is_gpu_available(cuda_only=True): 867 random_seed.set_random_seed(0) 868 x = random_ops.truncated_normal([1, 784], seed=0) 869 conv = _two_layer_model(x) 870 multiple = array_ops.placeholder(dtype='int32') 871 tile = array_ops.tile(conv, multiple) 872 output = array_ops.identity(tile) 873 874 multiple_val = [2, 3, 4, 1] 875 with session.Session(config=_get_config(False)) as sess: 876 output_val_ref = sess.run(output, feed_dict={multiple: multiple_val}) 877 878 with session.Session(config=_get_config()) as sess: 879 metadata = config_pb2.RunMetadata() 880 output_val = sess.run( 881 output, run_metadata=metadata, feed_dict={ 882 multiple: multiple_val 883 }) 884 885 nodes = [] 886 num_transposes = 0 887 for node in metadata.cost_graph.node: 888 if _is_transpose(node.name): 889 num_transposes += 1 890 nodes.append(node.name) 891 892 # Four transposes were initially added in the Expand phase of 893 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 894 expected_num_transposes = 2 895 self.assertEqual(expected_num_transposes, num_transposes) 896 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 897 self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes) 898 self._assert_vec_nhwc_to_nchw('Tile-1', nodes) 899 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 900 901 @test_util.deprecated_graph_mode_only 902 def testReverseWithConstDims(self): 903 if test.is_gpu_available(cuda_only=True): 904 random_seed.set_random_seed(0) 905 x = random_ops.truncated_normal([1, 784], seed=0) 906 conv = _two_layer_model(x) 907 dims = constant_op.constant([3, 1], name='DimsConst') 908 reverse = array_ops.reverse(conv, dims) 909 output = array_ops.identity(reverse) 910 911 with session.Session(config=_get_config(False)) as sess: 912 output_val_ref = self.evaluate(output) 913 914 with session.Session(config=_get_config()) as sess: 915 metadata = config_pb2.RunMetadata() 916 output_val = sess.run(output, run_metadata=metadata) 917 918 nodes = [] 919 num_transposes = 0 920 for node in metadata.cost_graph.node: 921 if _is_transpose(node.name): 922 num_transposes += 1 923 nodes.append(node.name) 924 925 # Four transposes were initially added in the Expand phase of 926 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 927 expected_num_transposes = 2 928 self.assertEqual(expected_num_transposes, num_transposes) 929 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 930 self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) 931 self.assertIn('ReverseV2-1-LayoutOptimizer', nodes) 932 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 933 934 @test_util.deprecated_graph_mode_only 935 def testReverseWithNonConstDims(self): 936 if test.is_gpu_available(cuda_only=True): 937 random_seed.set_random_seed(0) 938 x = random_ops.truncated_normal([1, 784], seed=0) 939 conv = _two_layer_model(x) 940 dims = array_ops.placeholder(dtype='int32') 941 reverse = array_ops.reverse(conv, dims) 942 output = array_ops.identity(reverse) 943 944 dims_val = [2, 3] 945 with session.Session(config=_get_config(False)) as sess: 946 output_val_ref = sess.run(output, feed_dict={dims: dims_val}) 947 948 with session.Session(config=_get_config()) as sess: 949 metadata = config_pb2.RunMetadata() 950 output_val = sess.run( 951 output, run_metadata=metadata, feed_dict={ 952 dims: dims_val 953 }) 954 955 nodes = [] 956 num_transposes = 0 957 for node in metadata.cost_graph.node: 958 if _is_transpose(node.name): 959 num_transposes += 1 960 nodes.append(node.name) 961 962 # Four transposes were initially added in the Expand phase of 963 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 964 expected_num_transposes = 2 965 self.assertEqual(expected_num_transposes, num_transposes) 966 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 967 self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes) 968 self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes) 969 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 970 971 @test_util.deprecated_graph_mode_only 972 def testSelectOp(self): 973 if test.is_gpu_available(cuda_only=True): 974 random_seed.set_random_seed(0) 975 x = random_ops.truncated_normal([1, 784], seed=0) 976 conv = _two_layer_model(x) 977 add = math_ops.add(conv, conv) 978 mean = math_ops.reduce_mean(conv) 979 condition = math_ops.less(conv, mean) 980 select = gen_math_ops.select(condition, conv, add) 981 output = array_ops.identity(select) 982 983 with session.Session(config=_get_config(False)) as sess: 984 output_val_ref = self.evaluate(output) 985 986 with session.Session(config=_get_config()) as sess: 987 metadata = config_pb2.RunMetadata() 988 output_val = sess.run(output, run_metadata=metadata) 989 990 nodes = [] 991 num_transposes = 0 992 for node in metadata.cost_graph.node: 993 if _is_transpose(node.name): 994 num_transposes += 1 995 nodes.append(node.name) 996 997 expected_num_transposes = 2 998 self.assertEqual(expected_num_transposes, num_transposes) 999 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1000 self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) 1001 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1002 1003 @test_util.deprecated_graph_mode_only 1004 def testSelectOpConditionUnknownShape(self): 1005 if test.is_gpu_available(cuda_only=True): 1006 random_seed.set_random_seed(0) 1007 x = random_ops.truncated_normal([1, 784], seed=0) 1008 conv = _two_layer_model(x) 1009 add = math_ops.add(conv, conv) 1010 condition = array_ops.placeholder(dtype='bool') 1011 select = gen_math_ops.select(condition, conv, add) 1012 output = array_ops.identity(select) 1013 1014 condition_val = np.zeros((1, 7, 7, 64)) 1015 with session.Session(config=_get_config(False)) as sess: 1016 output_val_ref = sess.run(output, feed_dict={condition: condition_val}) 1017 1018 with session.Session(config=_get_config()) as sess: 1019 metadata = config_pb2.RunMetadata() 1020 output_val = sess.run( 1021 output, run_metadata=metadata, feed_dict={condition: condition_val}) 1022 1023 nodes = [] 1024 num_transposes = 0 1025 for node in metadata.cost_graph.node: 1026 if _is_transpose(node.name): 1027 num_transposes += 1 1028 nodes.append(node.name) 1029 1030 expected_num_transposes = 3 1031 self.assertEqual(expected_num_transposes, num_transposes) 1032 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1033 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1034 1035 @test_util.deprecated_graph_mode_only 1036 def testSelectOpScalarCondition(self): 1037 if test.is_gpu_available(cuda_only=True): 1038 random_seed.set_random_seed(0) 1039 x = random_ops.truncated_normal([1, 784], seed=0) 1040 conv = _two_layer_model(x) 1041 add = math_ops.add(conv, conv) 1042 condition = constant_op.constant(True) 1043 select = gen_math_ops.select(condition, conv, add) 1044 output = array_ops.identity(select) 1045 1046 with session.Session(config=_get_config(False)) as sess: 1047 output_val_ref = self.evaluate(output) 1048 1049 with session.Session(config=_get_config()) as sess: 1050 metadata = config_pb2.RunMetadata() 1051 output_val = sess.run(output, run_metadata=metadata) 1052 1053 nodes = [] 1054 num_transposes = 0 1055 for node in metadata.cost_graph.node: 1056 if _is_transpose(node.name): 1057 num_transposes += 1 1058 nodes.append(node.name) 1059 1060 expected_num_transposes = 2 1061 self.assertEqual(expected_num_transposes, num_transposes) 1062 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1063 self._assert_trans_nchw_to_nhwc('Select-0-0', nodes) 1064 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1065 1066 @test_util.deprecated_graph_mode_only 1067 def testPadWithNonConstPaddings(self): 1068 if test.is_gpu_available(cuda_only=True): 1069 random_seed.set_random_seed(0) 1070 x = random_ops.truncated_normal([1, 784], seed=0) 1071 conv = _two_layer_model(x) 1072 paddings = array_ops.placeholder(dtype='int32') 1073 pad = array_ops.pad(conv, paddings) 1074 output = array_ops.identity(pad) 1075 1076 paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] 1077 with session.Session(config=_get_config(False)) as sess: 1078 output_val_ref = sess.run(output, feed_dict={paddings: paddings_val}) 1079 1080 with session.Session(config=_get_config()) as sess: 1081 metadata = config_pb2.RunMetadata() 1082 output_val = sess.run( 1083 output, run_metadata=metadata, feed_dict={ 1084 paddings: paddings_val 1085 }) 1086 1087 nodes = [] 1088 num_transposes = 0 1089 for node in metadata.cost_graph.node: 1090 if _is_transpose(node.name): 1091 num_transposes += 1 1092 nodes.append(node.name) 1093 1094 # Four transposes were initially added in the Expand phase of 1095 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1096 expected_num_transposes = 2 1097 self.assertEqual(expected_num_transposes, num_transposes) 1098 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1099 self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes) 1100 self._assert_vec_nhwc_to_nchw('Pad-1', nodes) 1101 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1102 1103 @test_util.deprecated_graph_mode_only 1104 def testMaxPoolV2(self): 1105 if test.is_gpu_available(cuda_only=True): 1106 random_seed.set_random_seed(0) 1107 x = random_ops.truncated_normal([1, 784], seed=0) 1108 conv = _two_layer_model(x) 1109 ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) 1110 strides = array_ops.placeholder(dtype='int32', shape=[4]) 1111 max_pool = gen_nn_ops.max_pool_v2(conv, ksize, strides, 'VALID') 1112 output = array_ops.identity(max_pool) 1113 1114 strides_val = [1, 3, 2, 1] 1115 with session.Session(config=_get_config(False)) as sess: 1116 output_val_ref = sess.run(output, feed_dict={strides: strides_val}) 1117 1118 with session.Session(config=_get_config()) as sess: 1119 metadata = config_pb2.RunMetadata() 1120 output_val = sess.run( 1121 output, run_metadata=metadata, feed_dict={ 1122 strides: strides_val 1123 }) 1124 1125 nodes = [] 1126 num_transposes = 0 1127 for node in metadata.cost_graph.node: 1128 if _is_transpose(node.name): 1129 num_transposes += 1 1130 nodes.append(node.name) 1131 1132 expected_num_transposes = 2 1133 self.assertEqual(expected_num_transposes, num_transposes) 1134 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1135 self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes) 1136 self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes) 1137 self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes) 1138 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1139 1140 @test_util.deprecated_graph_mode_only 1141 def testMaxPoolGradV2(self): 1142 if test.is_gpu_available(cuda_only=True): 1143 random_seed.set_random_seed(0) 1144 x = random_ops.truncated_normal([1, 784], seed=0) 1145 conv = _two_layer_model(x) 1146 ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) 1147 strides = array_ops.placeholder(dtype='int32', shape=[4]) 1148 max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize, 1149 strides, 'VALID') 1150 output = array_ops.identity(max_pool_grad) 1151 1152 strides_val = [1, 3, 2, 1] 1153 with session.Session(config=_get_config(False)) as sess: 1154 output_val_ref = sess.run(output, feed_dict={strides: strides_val}) 1155 1156 with session.Session(config=_get_config()) as sess: 1157 metadata = config_pb2.RunMetadata() 1158 output_val = sess.run( 1159 output, run_metadata=metadata, feed_dict={ 1160 strides: strides_val 1161 }) 1162 1163 nodes = [] 1164 num_transposes = 0 1165 for node in metadata.cost_graph.node: 1166 if _is_transpose(node.name): 1167 num_transposes += 1 1168 nodes.append(node.name) 1169 1170 expected_num_transposes = 2 1171 self.assertEqual(expected_num_transposes, num_transposes) 1172 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1173 self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes) 1174 self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes) 1175 self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes) 1176 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1177 1178 @test_util.deprecated_graph_mode_only 1179 def testLeakyRelu(self): 1180 if test.is_gpu_available(cuda_only=True): 1181 random_seed.set_random_seed(0) 1182 x = random_ops.truncated_normal([4, 14, 14, 1], seed=0) 1183 w = random_ops.truncated_normal([2, 2, 1, 2], seed=0) 1184 y = nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') 1185 y = nn.leaky_relu(y, alpha=0.2) 1186 output = array_ops.identity(y) 1187 1188 with session.Session(config=_get_config(False)) as sess: 1189 output_val_ref = sess.run(output) 1190 1191 with session.Session(config=_get_config()) as sess: 1192 metadata = config_pb2.RunMetadata() 1193 output_val = sess.run(output, run_metadata=metadata) 1194 1195 nodes = [] 1196 num_transposes = 0 1197 for node in metadata.cost_graph.node: 1198 if _is_transpose(node.name): 1199 num_transposes += 1 1200 nodes.append(node.name) 1201 1202 expected_num_transposes = 2 1203 self.assertEqual(expected_num_transposes, num_transposes) 1204 self._assert_trans_nchw_to_nhwc('LeakyRelu-0-0', nodes) 1205 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1206 1207 @test_util.deprecated_graph_mode_only 1208 def testLeakyReluGrad(self): 1209 if test.is_gpu_available(cuda_only=True): 1210 random_seed.set_random_seed(0) 1211 x = random_ops.truncated_normal([4, 14, 14, 1], seed=0) 1212 w = random_ops.truncated_normal([2, 2, 1, 1], seed=0) 1213 y = nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME') 1214 y = gen_nn_ops.leaky_relu_grad(y, x, alpha=0.2) 1215 output = array_ops.identity(y) 1216 1217 with session.Session(config=_get_config(False)) as sess: 1218 output_val_ref = sess.run(output) 1219 1220 with session.Session(config=_get_config()) as sess: 1221 metadata = config_pb2.RunMetadata() 1222 output_val = sess.run(output, run_metadata=metadata) 1223 1224 nodes = [] 1225 num_transposes = 0 1226 for node in metadata.cost_graph.node: 1227 if _is_transpose(node.name): 1228 num_transposes += 1 1229 nodes.append(node.name) 1230 1231 expected_num_transposes = 3 1232 self.assertEqual(expected_num_transposes, num_transposes) 1233 self._assert_trans_nhwc_to_nchw('LeakyReluGrad-1', nodes) 1234 self._assert_trans_nchw_to_nhwc('LeakyReluGrad-0-0', nodes) 1235 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1236 1237 @test_util.deprecated_graph_mode_only 1238 def testLeakyReluGradFor5DTensors(self): 1239 if test.is_gpu_available(cuda_only=True): 1240 random_seed.set_random_seed(0) 1241 x = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0) 1242 w = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0) 1243 y = gen_nn_ops.conv3d(x, w, [1, 1, 1, 1, 1], 'SAME') 1244 y = gen_nn_ops.leaky_relu_grad(y, x, alpha=0.2) 1245 output = array_ops.identity(y) 1246 1247 with session.Session(config=_get_config(False)) as sess: 1248 output_val_ref = sess.run(output) 1249 1250 with session.Session(config=_get_config()) as sess: 1251 metadata = config_pb2.RunMetadata() 1252 output_val = sess.run(output, run_metadata=metadata) 1253 1254 nodes = [] 1255 num_transposes = 0 1256 for node in metadata.cost_graph.node: 1257 if _is_transpose(node.name): 1258 num_transposes += 1 1259 nodes.append(node.name) 1260 1261 expected_num_transposes = 3 1262 self.assertEqual(expected_num_transposes, num_transposes) 1263 self._assert_trans_ndhwc_to_ncdhw('LeakyReluGrad-1', nodes) 1264 self._assert_trans_ncdhw_to_ndhwc('LeakyReluGrad-0-0', nodes) 1265 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1266 1267 @test_util.deprecated_graph_mode_only 1268 def testReduceOpsFor5DTensors(self): 1269 if test.is_gpu_available(cuda_only=True): 1270 random_seed.set_random_seed(0) 1271 x = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0) 1272 w = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0) 1273 conv3d = gen_nn_ops.conv3d(x, w, [1, 1, 1, 1, 1], 'SAME') 1274 y = math_ops.reduce_mean(conv3d, [0, 1, 2, 3], keepdims=True) 1275 output = array_ops.identity(y) 1276 1277 with session.Session(config=_get_config(False)) as sess: 1278 output_val_ref = sess.run(output) 1279 1280 with session.Session(config=_get_config()) as sess: 1281 metadata = config_pb2.RunMetadata() 1282 output_val = sess.run(output, run_metadata=metadata) 1283 1284 nodes = [] 1285 num_transposes = 0 1286 for node in metadata.cost_graph.node: 1287 if _is_transpose(node.name): 1288 num_transposes += 1 1289 nodes.append(node.name) 1290 1291 # The reduce op Mean needs to dim map the input reduce index to NCDHW. 1292 # Then, the output needs to be tranposed back to NDHWC. 1293 expected_num_transposes = 2 1294 self.assertEqual(expected_num_transposes, num_transposes) 1295 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1296 self._assert_map_ndhwc_to_ncdhw('Mean-1', nodes) 1297 self._assert_trans_ncdhw_to_ndhwc('Mean-0-0', nodes) 1298 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1299 1300 @test_util.deprecated_graph_mode_only 1301 def testBinaryOpsFor5DTensors(self): 1302 if test.is_gpu_available(cuda_only=True): 1303 random_seed.set_random_seed(0) 1304 x = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0) 1305 w = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0) 1306 mean = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0) 1307 variance = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0) 1308 gamma = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0) 1309 beta = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0) 1310 conv3d = gen_nn_ops.conv3d(x, w, [1, 1, 1, 1, 1], 'SAME') 1311 y = nn.batch_normalization( 1312 conv3d, 1313 mean=mean, 1314 variance=variance, 1315 scale=gamma, 1316 offset=beta, 1317 variance_epsilon=0.001) 1318 output = array_ops.identity(y) 1319 1320 with session.Session(config=_get_config(False)) as sess: 1321 output_val_ref = sess.run(output) 1322 1323 with session.Session(config=_get_config()) as sess: 1324 metadata = config_pb2.RunMetadata() 1325 output_val = sess.run(output, run_metadata=metadata) 1326 1327 nodes = [] 1328 num_transposes = 0 1329 for node in metadata.cost_graph.node: 1330 if _is_transpose(node.name): 1331 num_transposes += 1 1332 nodes.append(node.name) 1333 1334 # The binary ops mul_1 and add_1 in batch norm need to transpose one of 1335 # the two inputs to NCDHW. The other input has already been tranposed via 1336 # Conv3D. 1337 expected_num_transposes = 4 1338 self.assertEqual(expected_num_transposes, num_transposes) 1339 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1340 self._assert_trans_ndhwc_to_ncdhw('batchnorm/mul_1-1', nodes) 1341 self._assert_trans_ndhwc_to_ncdhw('batchnorm/add_1-1', nodes) 1342 self._assert_trans_ncdhw_to_ndhwc('batchnorm/add_1-0-0', nodes) 1343 1344 @test_util.deprecated_graph_mode_only 1345 def testBatchNorm3D(self): 1346 if test.is_gpu_available(cuda_only=True): 1347 random_seed.set_random_seed(0) 1348 x_3d = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0) 1349 filters = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0) 1350 strides_val = [1, 1, 1, 1, 1] 1351 scale = constant_op.constant(0.1, shape=[3]) 1352 offset = constant_op.constant(0.3, shape=[3]) 1353 conv3d = gen_nn_ops.conv3d(x_3d, filters, strides_val, 'SAME') 1354 y, _, _ = nn.fused_batch_norm(conv3d, scale, offset, data_format='NDHWC') 1355 output = array_ops.identity(y) 1356 1357 with session.Session(config=_get_config(False)) as sess: 1358 output_val_ref = sess.run(output) 1359 1360 with session.Session(config=_get_config()) as sess: 1361 metadata = config_pb2.RunMetadata() 1362 output_val = sess.run(output, run_metadata=metadata) 1363 1364 nodes = [] 1365 num_transposes = 0 1366 for node in metadata.cost_graph.node: 1367 if _is_transpose(node.name): 1368 num_transposes += 1 1369 nodes.append(node.name) 1370 1371 expected_num_transposes = 2 1372 self.assertEqual(expected_num_transposes, num_transposes) 1373 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1374 self._assert_trans_ncdhw_to_ndhwc('FusedBatchNormV3-0-0', nodes) 1375 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1376 1377 @test_util.deprecated_graph_mode_only 1378 def testBatchNormGrad3D(self): 1379 if test.is_gpu_available(cuda_only=True): 1380 random_seed.set_random_seed(0) 1381 x_3d = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0) 1382 filters = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0) 1383 strides_val = [1, 1, 1, 1, 1] 1384 scale = constant_op.constant(0.1, shape=[3]) 1385 offset = constant_op.constant(0.3, shape=[3]) 1386 mean = constant_op.constant(0.1, shape=[3]) 1387 variance = constant_op.constant(0.3, shape=[3]) 1388 conv3d = gen_nn_ops.conv3d(x_3d, filters, strides_val, 'SAME') 1389 y, running_mean, running_var, r0, r1, r2 = gen_nn_ops.fused_batch_norm_v3( 1390 conv3d, 1391 scale, 1392 offset, 1393 mean, 1394 variance, 1395 epsilon=1.001e-5, 1396 exponential_avg_factor=1.0, 1397 data_format='NDHWC', 1398 is_training=True, 1399 name='batch_norm') 1400 dx, dscale, doffset, _, _ = gen_nn_ops.fused_batch_norm_grad_v3( 1401 y, 1402 x_3d, 1403 scale, 1404 r0, 1405 r1, 1406 r2, 1407 epsilon=1.001e-5, 1408 data_format='NDHWC', 1409 is_training=True) 1410 output = array_ops.identity(dx) 1411 1412 with session.Session(config=_get_config(False)) as sess: 1413 output_val_ref = sess.run(output) 1414 1415 with session.Session(config=_get_config()) as sess: 1416 metadata = config_pb2.RunMetadata() 1417 output_val = sess.run(output, run_metadata=metadata) 1418 1419 nodes = [] 1420 num_transposes = 0 1421 for node in metadata.cost_graph.node: 1422 if _is_transpose(node.name): 1423 num_transposes += 1 1424 nodes.append(node.name) 1425 1426 expected_num_transposes = 3 1427 self.assertEqual(expected_num_transposes, num_transposes) 1428 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1429 self._assert_trans_ndhwc_to_ncdhw('FusedBatchNormGradV3-1', nodes) 1430 self._assert_trans_ncdhw_to_ndhwc('FusedBatchNormGradV3-0-0', nodes) 1431 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1432 1433 @test_util.deprecated_graph_mode_only 1434 def testConv3D(self): 1435 if not test.is_gpu_available(cuda_only=True): 1436 self.skipTest('GPU required') 1437 random_seed.set_random_seed(0) 1438 x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1439 w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0) 1440 strides = [1, 1, 1, 1, 1] 1441 y = gen_nn_ops.conv3d(x, w, strides, 'SAME') 1442 output = array_ops.identity(y) 1443 1444 with session.Session(config=_get_config(False)) as sess: 1445 output_val_ref = sess.run(output) 1446 1447 with session.Session(config=_get_config()) as sess: 1448 metadata = config_pb2.RunMetadata() 1449 output_val = sess.run(output, run_metadata=metadata) 1450 1451 nodes = [] 1452 num_transposes = 0 1453 for node in metadata.cost_graph.node: 1454 if _is_transpose(node.name): 1455 num_transposes += 1 1456 nodes.append(node.name) 1457 1458 expected_num_transposes = 2 1459 self.assertEqual(expected_num_transposes, num_transposes) 1460 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1461 self._assert_trans_ncdhw_to_ndhwc('Conv3D-0-0', nodes) 1462 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1463 1464 @test_util.deprecated_graph_mode_only 1465 def testConv3DBackpropInput(self): 1466 if not test.is_gpu_available(cuda_only=True): 1467 self.skipTest('GPU required') 1468 random_seed.set_random_seed(0) 1469 dy = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1470 w = random_ops.truncated_normal([2, 2, 2, 1, 1], seed=0) 1471 strides = [1, 1, 1, 1, 1] 1472 x_shape = array_ops.shape(dy) 1473 dx = gen_nn_ops.conv3d_backprop_input_v2(x_shape, w, dy, strides, 'SAME') 1474 output = array_ops.identity(dx) 1475 1476 with session.Session(config=_get_config(False)) as sess: 1477 output_val_ref = sess.run(output) 1478 1479 with session.Session(config=_get_config()) as sess: 1480 metadata = config_pb2.RunMetadata() 1481 output_val = sess.run(output, run_metadata=metadata) 1482 1483 nodes = [] 1484 num_transposes = 0 1485 for node in metadata.cost_graph.node: 1486 if _is_transpose(node.name): 1487 num_transposes += 1 1488 nodes.append(node.name) 1489 1490 expected_num_transposes = 2 1491 self.assertEqual(expected_num_transposes, num_transposes) 1492 self._assert_vec_ndhwc_to_ncdhw('Conv3DBackpropInputV2-0', nodes) 1493 self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropInputV2-2', nodes) 1494 self._assert_trans_ncdhw_to_ndhwc('Conv3DBackpropInputV2-0-0', nodes) 1495 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1496 1497 @test_util.deprecated_graph_mode_only 1498 def testConv3DBackpropFilter(self): 1499 if not test.is_gpu_available(cuda_only=True): 1500 self.skipTest('GPU required') 1501 random_seed.set_random_seed(0) 1502 x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1503 dy = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1504 strides = [1, 1, 1, 1, 1] 1505 w_shape = constant_op.constant([2, 2, 2, 1, 1], shape=[5]) 1506 dw = gen_nn_ops.conv3d_backprop_filter_v2(x, w_shape, dy, strides, 'SAME') 1507 output = array_ops.identity(dw) 1508 1509 with session.Session(config=_get_config(False)) as sess: 1510 output_val_ref = sess.run(output) 1511 1512 with session.Session(config=_get_config()) as sess: 1513 metadata = config_pb2.RunMetadata() 1514 output_val = sess.run(output, run_metadata=metadata) 1515 1516 nodes = [] 1517 num_transposes = 0 1518 for node in metadata.cost_graph.node: 1519 if _is_transpose(node.name): 1520 num_transposes += 1 1521 nodes.append(node.name) 1522 1523 expected_num_transposes = 2 1524 self.assertEqual(expected_num_transposes, num_transposes) 1525 self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropFilterV2-0', nodes) 1526 self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropFilterV2-2', nodes) 1527 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1528 1529 @test_util.deprecated_graph_mode_only 1530 def testBiasAddFor5DTensor(self): 1531 if not test.is_gpu_available(cuda_only=True): 1532 self.skipTest('GPU required') 1533 random_seed.set_random_seed(0) 1534 x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1535 w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0) 1536 b = random_ops.truncated_normal([2], seed=0) 1537 strides = [1, 1, 1, 1, 1] 1538 y = gen_nn_ops.conv3d(x, w, strides, 'SAME') 1539 y = gen_nn_ops.bias_add(y, b, 'NHWC') 1540 output = array_ops.identity(y) 1541 1542 with session.Session(config=_get_config(False)) as sess: 1543 output_val_ref = sess.run(output) 1544 1545 with session.Session(config=_get_config()) as sess: 1546 metadata = config_pb2.RunMetadata() 1547 output_val = sess.run(output, run_metadata=metadata) 1548 1549 nodes = [] 1550 num_transposes = 0 1551 for node in metadata.cost_graph.node: 1552 if _is_transpose(node.name): 1553 num_transposes += 1 1554 nodes.append(node.name) 1555 1556 expected_num_transposes = 2 1557 self.assertEqual(expected_num_transposes, num_transposes) 1558 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1559 self._assert_trans_ncdhw_to_ndhwc('BiasAdd-0-0', nodes) 1560 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1561 1562 @test_util.deprecated_graph_mode_only 1563 def testBiasAddGradFor5DTensor(self): 1564 if not test.is_gpu_available(cuda_only=True): 1565 self.skipTest('GPU required') 1566 random_seed.set_random_seed(0) 1567 dy = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1568 w = random_ops.truncated_normal([2, 2, 2, 1, 1], seed=0) 1569 strides = [1, 1, 1, 1, 1] 1570 dy_shape = array_ops.shape(dy) 1571 dx = gen_nn_ops.conv3d_backprop_input_v2(dy_shape, w, dy, strides, 'SAME') 1572 db = gen_nn_ops.bias_add_grad(dx, 'NHWC') 1573 output = array_ops.identity(db) 1574 1575 with session.Session(config=_get_config(False)) as sess: 1576 output_val_ref = sess.run(output) 1577 1578 with session.Session(config=_get_config()) as sess: 1579 metadata = config_pb2.RunMetadata() 1580 output_val = sess.run(output, run_metadata=metadata) 1581 1582 nodes = [] 1583 num_transposes = 0 1584 for node in metadata.cost_graph.node: 1585 if _is_transpose(node.name): 1586 num_transposes += 1 1587 nodes.append(node.name) 1588 1589 # The output of Conv3DBackpropInputV2 won't be converted back to NDHWC 1590 # because of the BiasAddGrad. 1591 expected_num_transposes = 1 1592 self.assertEqual(expected_num_transposes, num_transposes) 1593 self._assert_vec_ndhwc_to_ncdhw('Conv3DBackpropInputV2-0', nodes) 1594 self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropInputV2-2', nodes) 1595 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1596 1597 @test_util.deprecated_graph_mode_only 1598 def testSliceWithNonConstAxis(self): 1599 if test.is_gpu_available(cuda_only=True): 1600 random_seed.set_random_seed(0) 1601 x = random_ops.truncated_normal([1, 784], seed=0) 1602 conv = _two_layer_model(x) 1603 size = array_ops.placeholder(dtype='int32') 1604 s = array_ops.slice(conv, [0, 0, 0, 0], size) 1605 output = array_ops.identity(s) 1606 1607 size_val = [1, 2, 3, 4] 1608 with session.Session(config=_get_config(False)) as sess: 1609 output_val_ref = sess.run(output, feed_dict={size: size_val}) 1610 1611 with session.Session(config=_get_config()) as sess: 1612 metadata = config_pb2.RunMetadata() 1613 output_val = sess.run( 1614 output, run_metadata=metadata, feed_dict={ 1615 size: size_val 1616 }) 1617 1618 nodes = [] 1619 num_transposes = 0 1620 for node in metadata.cost_graph.node: 1621 if _is_transpose(node.name): 1622 num_transposes += 1 1623 nodes.append(node.name) 1624 1625 # Four transposes were initially added in the Expand phase of 1626 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1627 expected_num_transposes = 2 1628 self.assertEqual(expected_num_transposes, num_transposes) 1629 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1630 self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes) 1631 self._assert_vec_nhwc_to_nchw('Slice-2', nodes) 1632 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1633 1634 @test_util.deprecated_graph_mode_only 1635 def testSliceWithNonConstAxisFor5DTensor(self): 1636 if not test.is_gpu_available(cuda_only=True): 1637 self.skipTest('GPU required') 1638 random_seed.set_random_seed(0) 1639 x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0) 1640 w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0) 1641 strides = [1, 1, 1, 1, 1] 1642 y = gen_nn_ops.conv3d(x, w, strides, 'SAME') 1643 size = array_ops.placeholder(dtype='int32') 1644 s = array_ops.slice(y, [0, 0, 0, 0, 0], size) 1645 output = array_ops.identity(s) 1646 1647 size_val = [1, 1, 2, 2, 1] 1648 with session.Session(config=_get_config(False)) as sess: 1649 output_val_ref = sess.run(output, feed_dict={size: size_val}) 1650 1651 with session.Session(config=_get_config()) as sess: 1652 metadata = config_pb2.RunMetadata() 1653 output_val = sess.run( 1654 output, run_metadata=metadata, feed_dict={size: size_val}) 1655 1656 nodes = [] 1657 num_transposes = 0 1658 for node in metadata.cost_graph.node: 1659 if _is_transpose(node.name): 1660 num_transposes += 1 1661 nodes.append(node.name) 1662 1663 # Four transposes were initially added in the Expand phase of 1664 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1665 expected_num_transposes = 2 1666 self.assertEqual(expected_num_transposes, num_transposes) 1667 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1668 self._assert_trans_ncdhw_to_ndhwc('Slice-0-0', nodes) 1669 self._assert_vec_ndhwc_to_ncdhw('Slice-2', nodes) 1670 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1671 1672 @test_util.deprecated_graph_mode_only 1673 def testStridedSliceWithNonConstAxis(self): 1674 if test.is_gpu_available(cuda_only=True): 1675 random_seed.set_random_seed(0) 1676 x = random_ops.truncated_normal([1, 784], seed=0) 1677 conv = _two_layer_model(x) 1678 end = array_ops.placeholder(dtype='int32') 1679 s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1]) 1680 output = array_ops.identity(s) 1681 1682 end_val = [1, 2, 3, 4] 1683 with session.Session(config=_get_config(False)) as sess: 1684 output_val_ref = sess.run(output, feed_dict={end: end_val}) 1685 1686 with session.Session(config=_get_config()) as sess: 1687 metadata = config_pb2.RunMetadata() 1688 output_val = sess.run( 1689 output, run_metadata=metadata, feed_dict={ 1690 end: end_val 1691 }) 1692 1693 nodes = [] 1694 num_transposes = 0 1695 for node in metadata.cost_graph.node: 1696 if _is_transpose(node.name): 1697 num_transposes += 1 1698 nodes.append(node.name) 1699 1700 # Four transposes were initially added in the Expand phase of 1701 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1702 expected_num_transposes = 2 1703 self.assertEqual(expected_num_transposes, num_transposes) 1704 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1705 self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes) 1706 self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes) 1707 self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) 1708 self.assertIn('StridedSlice-3-LayoutOptimizer', nodes) 1709 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1710 1711 @test_util.deprecated_graph_mode_only 1712 def testStridedSliceWithMask1011(self): 1713 if test.is_gpu_available(cuda_only=True): 1714 random_seed.set_random_seed(0) 1715 x = random_ops.truncated_normal([1, 784], seed=0) 1716 conv = _two_layer_model(x) 1717 # This will generate a StridedSlice op with begin mask and 1718 # end mask 11(1011). 1719 s = conv[:, :, 1:-1, :] 1720 output = array_ops.identity(s) 1721 1722 with session.Session(config=_get_config(False)) as sess: 1723 output_val_ref = self.evaluate(output) 1724 1725 with session.Session(config=_get_config()) as sess: 1726 metadata = config_pb2.RunMetadata() 1727 output_val = sess.run(output, run_metadata=metadata) 1728 1729 nodes = [] 1730 num_transposes = 0 1731 for node in metadata.cost_graph.node: 1732 if _is_transpose(node.name): 1733 num_transposes += 1 1734 nodes.append(node.name) 1735 1736 # Four transposes were initially added in the Expand phase of 1737 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1738 expected_num_transposes = 2 1739 self.assertEqual(expected_num_transposes, num_transposes) 1740 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1741 self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) 1742 self.assertIn('strided_slice-1-LayoutOptimizer', nodes) 1743 self.assertIn('strided_slice-2-LayoutOptimizer', nodes) 1744 self.assertIn('strided_slice-3-LayoutOptimizer', nodes) 1745 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1746 1747 @test_util.deprecated_graph_mode_only 1748 def testStridedSliceWithMask0111(self): 1749 if test.is_gpu_available(cuda_only=True): 1750 random_seed.set_random_seed(0) 1751 x = random_ops.truncated_normal([1, 784], seed=0) 1752 conv = _two_layer_model(x) 1753 # This will generate a StridedSlice op with begin mask and 1754 # end mask 7(0111). 1755 s = conv[:, :, :, 1:-1] 1756 output = array_ops.identity(s) 1757 1758 with session.Session(config=_get_config(False)) as sess: 1759 output_val_ref = self.evaluate(output) 1760 1761 with session.Session(config=_get_config()) as sess: 1762 metadata = config_pb2.RunMetadata() 1763 output_val = sess.run(output, run_metadata=metadata) 1764 1765 nodes = [] 1766 num_transposes = 0 1767 for node in metadata.cost_graph.node: 1768 if _is_transpose(node.name): 1769 num_transposes += 1 1770 nodes.append(node.name) 1771 1772 # Four transposes were initially added in the Expand phase of 1773 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1774 expected_num_transposes = 2 1775 self.assertEqual(expected_num_transposes, num_transposes) 1776 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1777 self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes) 1778 self.assertIn('strided_slice-1-LayoutOptimizer', nodes) 1779 self.assertIn('strided_slice-2-LayoutOptimizer', nodes) 1780 self.assertIn('strided_slice-3-LayoutOptimizer', nodes) 1781 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1782 1783 @test_util.deprecated_graph_mode_only 1784 def testStridedSliceGradWithNonConstAxis(self): 1785 if test.is_gpu_available(cuda_only=True): 1786 random_seed.set_random_seed(0) 1787 x = random_ops.truncated_normal([1, 784], seed=0) 1788 conv = _two_layer_model(x) 1789 end = array_ops.placeholder(dtype='int32') 1790 shape = array_ops.shape(conv) 1791 end_val = [1, 2, 3, 4] 1792 s = array_ops.strided_slice( 1793 conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1]) 1794 s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end, 1795 [1, 2, 3, 1], s) 1796 output = array_ops.identity(s_grad) 1797 1798 with session.Session(config=_get_config(False)) as sess: 1799 output_val_ref = sess.run(output, feed_dict={end: end_val}) 1800 1801 with session.Session(config=_get_config()) as sess: 1802 metadata = config_pb2.RunMetadata() 1803 output_val = sess.run( 1804 output, run_metadata=metadata, feed_dict={ 1805 end: end_val 1806 }) 1807 1808 nodes = [] 1809 num_transposes = 0 1810 for node in metadata.cost_graph.node: 1811 if _is_transpose(node.name): 1812 num_transposes += 1 1813 nodes.append(node.name) 1814 1815 # Four transposes were initially added in the Expand phase of 1816 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1817 expected_num_transposes = 2 1818 self.assertEqual(expected_num_transposes, num_transposes) 1819 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1820 self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes) 1821 self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes) 1822 self.assertIn('StridedSlice-1-LayoutOptimizer', nodes) 1823 self.assertIn('StridedSlice-2-LayoutOptimizer', nodes) 1824 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1825 1826 @test_util.deprecated_graph_mode_only 1827 def testShapeN(self): 1828 if test.is_gpu_available(cuda_only=True): 1829 x = array_ops.placeholder(dtype='float32') 1830 conv = _two_layer_model(x) 1831 shapen = array_ops.shape_n([conv, conv]) 1832 output = math_ops.add(shapen[0], shapen[1]) 1833 1834 x_val = [1.7] * 784 1835 with session.Session(config=_get_config(False)) as sess: 1836 output_val_ref = sess.run(output, feed_dict={x: x_val}) 1837 1838 with session.Session(config=_get_config()) as sess: 1839 metadata = config_pb2.RunMetadata() 1840 output_val = sess.run( 1841 output, run_metadata=metadata, feed_dict={ 1842 x: x_val 1843 }) 1844 1845 nodes = [] 1846 num_transposes = 0 1847 for node in metadata.cost_graph.node: 1848 if _is_transpose(node.name): 1849 num_transposes += 1 1850 nodes.append(node.name) 1851 1852 expected_num_transposes = 1 1853 self.assertEqual(expected_num_transposes, num_transposes) 1854 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1855 self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes) 1856 self.assertAllEqual(output_val_ref, output_val) 1857 1858 @test_util.deprecated_graph_mode_only 1859 def testShapeNFor5DTensor(self): 1860 if not test.is_gpu_available(cuda_only=True): 1861 self.skipTest('GPU required') 1862 h = array_ops.placeholder(dtype='float32') 1863 x = array_ops.reshape(h, [-1, 2, 14, 14, 1]) 1864 w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0) 1865 strides = [1, 1, 1, 1, 1] 1866 y = gen_nn_ops.conv3d(x, w, strides, 'SAME') 1867 shapen = array_ops.shape_n([y, y]) 1868 output = math_ops.add(shapen[0], shapen[1]) 1869 1870 x_val = [1.7] * 784 1871 with session.Session(config=_get_config(False)) as sess: 1872 output_val_ref = sess.run(output, feed_dict={h: x_val}) 1873 1874 with session.Session(config=_get_config()) as sess: 1875 metadata = config_pb2.RunMetadata() 1876 output_val = sess.run(output, run_metadata=metadata, feed_dict={h: x_val}) 1877 1878 nodes = [] 1879 num_transposes = 0 1880 for node in metadata.cost_graph.node: 1881 if _is_transpose(node.name): 1882 num_transposes += 1 1883 nodes.append(node.name) 1884 1885 expected_num_transposes = 1 1886 self.assertEqual(expected_num_transposes, num_transposes) 1887 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1888 self._assert_vec_ncdhw_to_ndhwc('ShapeN-0-0', nodes) 1889 self._assert_vec_ncdhw_to_ndhwc('ShapeN-1-0', nodes) 1890 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1891 1892 @test_util.deprecated_graph_mode_only 1893 def testIdentityNFor4DAnd5DTensors(self): 1894 if not test.is_gpu_available(cuda_only=True): 1895 self.skipTest('GPU required') 1896 h = array_ops.placeholder(dtype='float32') 1897 x = array_ops.reshape(h, [-1, 2, 14, 14, 1]) 1898 w = random_ops.truncated_normal([2, 2, 2, 1, 4], seed=0) 1899 strides = [1, 1, 1, 1, 1] 1900 y = gen_nn_ops.conv3d(x, w, strides, 'SAME') 1901 x1 = array_ops.reshape(h, [-1, 784]) 1902 y1 = _two_layer_model(x1) 1903 outputs = array_ops.identity_n([y1, y]) 1904 new_x0 = array_ops.reshape(outputs[0], [-1, 2, 14, 14, 1]) 1905 new_x1 = array_ops.reshape(outputs[1], [-1, 2, 14, 14, 1]) 1906 output = math_ops.add(new_x0, new_x1) 1907 1908 x_val = [1.7] * 784 1909 with session.Session(config=_get_config(False)) as sess: 1910 output_val_ref = sess.run(output, feed_dict={h: x_val}) 1911 1912 with session.Session(config=_get_config()) as sess: 1913 metadata = config_pb2.RunMetadata() 1914 output_val = sess.run(output, run_metadata=metadata, feed_dict={h: x_val}) 1915 1916 nodes = [] 1917 num_transposes = 0 1918 for node in metadata.cost_graph.node: 1919 if _is_transpose(node.name): 1920 num_transposes += 1 1921 nodes.append(node.name) 1922 1923 expected_num_transposes = 4 1924 self.assertEqual(expected_num_transposes, num_transposes) 1925 self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes) 1926 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1927 self._assert_trans_ncdhw_to_ndhwc('IdentityN-1-0', nodes) 1928 self._assert_trans_nchw_to_nhwc('IdentityN-0-0', nodes) 1929 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1930 1931 @test_util.deprecated_graph_mode_only 1932 def testShapeNFollowedByNotConvertibleNodeReshape(self): 1933 if test.is_gpu_available(cuda_only=True): 1934 x = array_ops.placeholder(dtype='float32') 1935 conv = _two_layer_model(x) 1936 conv_reshape = array_ops.reshape(conv, [1, 1, 1, -1]) 1937 shapen = array_ops.shape_n([conv, conv_reshape]) 1938 shape = array_ops.identity(shapen[1]) 1939 ones = array_ops.ones(shape) 1940 output = math_ops.add_n([conv_reshape, ones]) 1941 1942 x_val = [1.7] * 784 1943 with session.Session(config=_get_config(False)) as sess: 1944 output_val_ref = sess.run(output, feed_dict={x: x_val}) 1945 1946 with session.Session(config=_get_config()) as sess: 1947 metadata = config_pb2.RunMetadata() 1948 output_val = sess.run( 1949 output, run_metadata=metadata, feed_dict={x: x_val}) 1950 1951 nodes = [] 1952 num_transposes = 0 1953 for node in metadata.cost_graph.node: 1954 if _is_transpose(node.name): 1955 num_transposes += 1 1956 nodes.append(node.name) 1957 1958 expected_num_transposes = 2 1959 self.assertEqual(expected_num_transposes, num_transposes) 1960 self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) 1961 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1962 1963 @test_util.deprecated_graph_mode_only 1964 def testLoop(self): 1965 if test.is_gpu_available(cuda_only=True): 1966 output = _loop() 1967 1968 with session.Session(config=_get_config(False)) as sess: 1969 output_val_ref = self.evaluate(output) 1970 1971 with session.Session(config=_get_config()) as sess: 1972 metadata = config_pb2.RunMetadata() 1973 output_val = sess.run(output, run_metadata=metadata) 1974 1975 nodes = [] 1976 num_transposes = 0 1977 for node in metadata.cost_graph.node: 1978 if _is_transpose(node.name): 1979 num_transposes += 1 1980 nodes.append(node.name) 1981 1982 # Four transposes were initially added in the Expand phase of 1983 # LayoutOptimizer; two of them are cancelled out in the Collapse phase. 1984 expected_num_transposes = 2 1985 self.assertEqual(expected_num_transposes, num_transposes) 1986 self.assertEqual(expected_num_transposes, num_transposes) 1987 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 1988 self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes) 1989 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 1990 1991 @test_util.deprecated_graph_mode_only 1992 def testLoopWithBranch(self): 1993 if test.is_gpu_available(cuda_only=True): 1994 output = _loop_with_branch() 1995 1996 with session.Session(config=_get_config(False)) as sess: 1997 output_val_ref = self.evaluate(output) 1998 1999 with session.Session(config=_get_config()) as sess: 2000 metadata = config_pb2.RunMetadata() 2001 output_val = sess.run(output, run_metadata=metadata) 2002 2003 nodes = [] 2004 num_transposes = 0 2005 for node in metadata.cost_graph.node: 2006 if _is_transpose(node.name): 2007 num_transposes += 1 2008 nodes.append(node.name) 2009 2010 expected_num_transposes = 3 2011 self.assertEqual(expected_num_transposes, num_transposes) 2012 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 2013 self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) 2014 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 2015 2016 @test_util.deprecated_graph_mode_only 2017 def testLoopWithVecAnd4D(self): 2018 if test.is_gpu_available(cuda_only=True): 2019 output = _loop_with_vec_and_4d() 2020 2021 with session.Session(config=_get_config(False)) as sess: 2022 output_val_ref = self.evaluate(output) 2023 2024 with session.Session(config=_get_config()) as sess: 2025 metadata = config_pb2.RunMetadata() 2026 output_val = sess.run(output, run_metadata=metadata) 2027 2028 nodes = [] 2029 num_transposes = 0 2030 for node in metadata.cost_graph.node: 2031 if _is_transpose(node.name): 2032 num_transposes += 1 2033 nodes.append(node.name) 2034 2035 expected_num_transposes = 2 2036 self.assertEqual(expected_num_transposes, num_transposes) 2037 self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) 2038 self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) 2039 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 2040 2041 @test_util.deprecated_graph_mode_only 2042 def testBinaryOpSecondPort(self): 2043 if test.is_gpu_available(cuda_only=True): 2044 output = _model_with_second_port() 2045 2046 with session.Session(config=_get_config(False)) as sess: 2047 output_val_ref = self.evaluate(output) 2048 2049 with session.Session(config=_get_config()) as sess: 2050 metadata = config_pb2.RunMetadata() 2051 output_val = sess.run(output, run_metadata=metadata) 2052 2053 nodes = [] 2054 num_transposes = 0 2055 for node in metadata.cost_graph.node: 2056 if _is_transpose(node.name): 2057 num_transposes += 1 2058 nodes.append(node.name) 2059 2060 expected_num_transposes = 2 2061 self.assertEqual(expected_num_transposes, num_transposes) 2062 self._assert_trans_nhwc_to_nchw('FusedBatchNormV3-0', nodes) 2063 self._assert_trans_nchw_to_nhwc('Add-0-0', nodes) 2064 self.assertAllClose(output_val_ref, output_val, atol=1e-3) 2065 2066 @test_util.deprecated_graph_mode_only 2067 def testGradient(self): 2068 meta_graph = _simple_metagraph() 2069 config = config_pb2.ConfigProto() 2070 config.graph_options.rewrite_options.CopyFrom( 2071 rewriter_config_pb2.RewriterConfig( 2072 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON, 2073 min_graph_nodes=-1)) 2074 optimized_graph = tf_optimizer.OptimizeGraph( 2075 config, meta_graph, cluster=_get_cluster()) 2076 2077 found = 0 2078 for node in optimized_graph.node: 2079 if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']: 2080 found += 1 2081 self.assertEqual(node.attr['data_format'].s, b'NCHW') 2082 self.assertEqual(found, 5) 2083 2084 @test_util.deprecated_graph_mode_only 2085 def testDepthwise(self): 2086 meta_graph = _simple_metagraph(depthwise=True) 2087 config = config_pb2.ConfigProto() 2088 config.graph_options.rewrite_options.CopyFrom( 2089 rewriter_config_pb2.RewriterConfig( 2090 layout_optimizer=rewriter_config_pb2.RewriterConfig.ON, 2091 min_graph_nodes=-1)) 2092 optimized_graph = tf_optimizer.OptimizeGraph( 2093 config, meta_graph, cluster=_get_cluster()) 2094 2095 found = 0 2096 for node in optimized_graph.node: 2097 if node.op in [ 2098 'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter', 2099 'DepthwiseConv2dNativeBackpropInput' 2100 ]: 2101 found += 1 2102 self.assertEqual(node.attr['data_format'].s, b'NCHW') 2103 self.assertEqual(found, 6) 2104 2105 def testCheckpointCompatibility(self): 2106 if not test.is_gpu_available(cuda_only=True): 2107 self.skipTest('GPU required') 2108 2109 checkpoint_path = self.get_temp_dir() 2110 self._train(checkpoint_path) 2111 vars_expected = self._train(checkpoint_path, restore=True) 2112 vars_layout_optimized = self._train( 2113 checkpoint_path, restore=True, layout_optimizer=True) 2114 2115 for var_expected, var_layout_optimized in zip(vars_expected, 2116 vars_layout_optimized): 2117 self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6) 2118 2119 2120if __name__ == '__main__': 2121 test.main() 2122