xref: /aosp_15_r20/external/tensorflow/tensorflow/python/grappler/layout_optimizer_test.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for Grappler LayoutOptimizer."""
16
17import numpy as np
18
19from tensorflow.core.protobuf import config_pb2
20from tensorflow.core.protobuf import device_properties_pb2
21from tensorflow.core.protobuf import rewriter_config_pb2
22from tensorflow.core.protobuf import saver_pb2
23from tensorflow.python.client import session
24from tensorflow.python.framework import constant_op
25from tensorflow.python.framework import dtypes
26from tensorflow.python.framework import ops
27from tensorflow.python.framework import random_seed
28from tensorflow.python.framework import test_util
29from tensorflow.python.grappler import cluster as gcluster
30from tensorflow.python.grappler import tf_optimizer
31from tensorflow.python.layers import convolutional as conv_layers
32from tensorflow.python.ops import array_ops
33from tensorflow.python.ops import gen_array_ops
34from tensorflow.python.ops import gen_math_ops
35from tensorflow.python.ops import gen_nn_ops
36from tensorflow.python.ops import map_fn
37from tensorflow.python.ops import math_ops
38from tensorflow.python.ops import nn
39from tensorflow.python.ops import random_ops
40from tensorflow.python.ops import state_ops
41from tensorflow.python.ops import variables
42from tensorflow.python.platform import test
43from tensorflow.python.training import gradient_descent
44from tensorflow.python.training import saver as saver_lib
45
46
47def _weight(shape):
48  """Generates a weight of a given shape."""
49  return random_ops.truncated_normal(shape, seed=0, stddev=0.1)
50
51
52def _bias(shape):
53  """Generates a bias of a given shape."""
54  return constant_op.constant(0.1, shape=shape)
55
56
57def _conv2d(x, w):
58  """Returns a 2d convolution layer with full stride."""
59  return nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
60
61
62def _max_pool_2x2(x):
63  """Downsamples a feature map by 2X."""
64  return nn.max_pool(
65      x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
66
67
68# Taken from tensorflow/examples/tutorials/mnist/mnist_deep.py
69def _two_layer_model(x):
70  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
71  w_conv1 = _weight([5, 5, 1, 32])
72  b_conv1 = _bias([32])
73  h_conv1 = nn.relu(_conv2d(x_image, w_conv1) + b_conv1)
74  h_pool1 = _max_pool_2x2(h_conv1)
75  w_conv2 = _weight([5, 5, 32, 64])
76  b_conv2 = _bias([64])
77  h_conv2 = nn.relu(_conv2d(h_pool1, w_conv2) + b_conv2)
78  h_pool2 = _max_pool_2x2(h_conv2)
79  return h_pool2
80
81
82def _model_with_second_port():
83  random_seed.set_random_seed(0)
84  x = random_ops.truncated_normal([2, 5, 5, 4], seed=0)
85  scale = constant_op.constant(0.1, shape=[4])
86  offset = constant_op.constant(0.3, shape=[4])
87  y, mean, _ = nn.fused_batch_norm(x, scale, offset)
88  mul = math_ops.add(y, mean)
89  output = array_ops.identity(mul)
90  return output
91
92
93def _model_with_branch(x):
94  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
95  w_conv1 = _weight([5, 5, 1, 32])
96  w_conv2 = _weight([5, 5, 1, 32])
97  c_conv1 = _conv2d(x_image, w_conv1)
98  c_conv2 = _conv2d(x_image, w_conv2)
99  add = math_ops.add(c_conv1, c_conv2)
100  return add
101
102
103def _model_with_vec_and_4d(x):
104  x_image = array_ops.reshape(x, [-1, 28, 28, 1])
105  w_conv1 = _weight([5, 5, 1, 32])
106  c_conv1 = _conv2d(x_image, w_conv1)
107  vector = constant_op.constant(6.4, shape=[32])
108  add = math_ops.add(c_conv1, vector)
109  return add
110
111
112def _loop():
113  random_seed.set_random_seed(0)
114  x1 = random_ops.truncated_normal([1, 784], seed=0)
115  x2 = random_ops.truncated_normal([1, 784], seed=0)
116  x3 = random_ops.truncated_normal([1, 784], seed=0)
117  x4 = random_ops.truncated_normal([1, 784], seed=0)
118  elems = (x1, x2, x3, x4)
119  outputs = map_fn.map_fn(_two_layer_model, elems, dtype=dtypes.float32)
120  return outputs
121
122
123def _loop_with_branch():
124  random_seed.set_random_seed(0)
125  x1 = random_ops.truncated_normal([1, 784], seed=0)
126  x2 = random_ops.truncated_normal([1, 784], seed=0)
127  x3 = random_ops.truncated_normal([1, 784], seed=0)
128  x4 = random_ops.truncated_normal([1, 784], seed=0)
129  elems = (x1, x2, x3, x4)
130  outputs = map_fn.map_fn(_model_with_branch, elems, dtype=dtypes.float32)
131  return outputs
132
133
134def _loop_with_vec_and_4d():
135  random_seed.set_random_seed(0)
136  x1 = random_ops.truncated_normal([1, 784], seed=0)
137  x2 = random_ops.truncated_normal([1, 784], seed=0)
138  x3 = random_ops.truncated_normal([1, 784], seed=0)
139  x4 = random_ops.truncated_normal([1, 784], seed=0)
140  elems = (x1, x2, x3, x4)
141  outputs = map_fn.map_fn(_model_with_vec_and_4d, elems, dtype=dtypes.float32)
142  return outputs
143
144
145def _get_config(layout_optimizer=True):
146  if layout_optimizer:
147    rewrite_options = rewriter_config_pb2.RewriterConfig(
148        layout_optimizer=rewriter_config_pb2.RewriterConfig.ON,
149        # do not remove duplicated nodes
150        arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF)
151  else:
152    rewrite_options = rewriter_config_pb2.RewriterConfig(
153        layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF,
154        # do not remove duplicated nodes
155        arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF)
156  rewrite_options.min_graph_nodes = -1
157  graph_options = config_pb2.GraphOptions(
158      rewrite_options=rewrite_options, build_cost_model=1)
159  config = config_pb2.ConfigProto(graph_options=graph_options)
160  config.graph_options.optimizer_options.opt_level = -1
161  return config
162
163
164def _simple_metagraph(depthwise=False):
165  random_seed.set_random_seed(0)
166  x = variables.Variable(random_ops.truncated_normal([1, 200, 200, 3], seed=0))
167  conv = conv_layers.separable_conv2d if depthwise else conv_layers.conv2d
168  y = conv(x, 32, [3, 3])
169  z = conv(y, 32, [3, 3])
170  optimizer = gradient_descent.GradientDescentOptimizer(1e-4)
171  loss = math_ops.reduce_mean(z)
172  train_op = optimizer.minimize(loss)
173  graph = ops.get_default_graph()
174  graph.add_to_collection('train_op', train_op)
175  meta_graph = saver_lib.export_meta_graph(graph_def=graph.as_graph_def())
176  return meta_graph
177
178
179def _get_cluster():
180  named_device = device_properties_pb2.NamedDevice()
181  named_device.name = '/GPU:0'
182  named_device.properties.type = 'GPU'
183  named_device.properties.num_cores = 24
184  named_device.properties.frequency = 1000
185  named_device.properties.environment['architecture'] = '4'
186  cluster = gcluster.Cluster(devices=[named_device])
187  return cluster
188
189
190def _is_transpose(node):
191  return node.endswith('TransposeNHWCToNCHW-LayoutOptimizer') or node.endswith(
192      'TransposeNCHWToNHWC-LayoutOptimizer') or node.endswith(
193          'TransposeNDHWCToNCDHW-LayoutOptimizer') or node.endswith(
194              'TransposeNCDHWToNDHWC-LayoutOptimizer')
195
196
197def _is_permute(node):
198  return node.endswith('VecPermuteNHWCToNCHW-LayoutOptimizer') or node.endswith(
199      'VecPermuteNCHWToNHWC-LayoutOptimizer')
200
201
202@test_util.for_all_test_methods(test_util.no_xla_auto_jit,
203                                'Test does not apply in XLA setting')
204class LayoutOptimizerTest(test.TestCase):
205  """Tests the Grappler layout optimizer."""
206
207  def _assert_trans_nchw_to_nhwc(self, name, nodes):
208    self.assertIn(name + '-TransposeNCHWToNHWC-LayoutOptimizer', nodes)
209
210  def _assert_trans_nhwc_to_nchw(self, name, nodes):
211    self.assertIn(name + '-TransposeNHWCToNCHW-LayoutOptimizer', nodes)
212
213  def _assert_trans_ncdhw_to_ndhwc(self, name, nodes):
214    self.assertIn(name + '-TransposeNCDHWToNDHWC-LayoutOptimizer', nodes)
215
216  def _assert_trans_ndhwc_to_ncdhw(self, name, nodes):
217    self.assertIn(name + '-TransposeNDHWCToNCDHW-LayoutOptimizer', nodes)
218
219  def _assert_map_nhwc_to_nchw(self, name, nodes):
220    self.assertIn(name + '-DimMapNHWCToNCHW-LayoutOptimizer', nodes)
221
222  def _assert_map_ndhwc_to_ncdhw(self, name, nodes):
223    self.assertIn(name + '-DataFormatDimMapNDHWCToNCDHW-LayoutOptimizer', nodes)
224
225  def _assert_vec_nchw_to_nhwc(self, name, nodes):
226    self.assertIn(name + '-VecPermuteNCHWToNHWC-LayoutOptimizer', nodes)
227
228  def _assert_vec_nhwc_to_nchw(self, name, nodes):
229    self.assertIn(name + '-VecPermuteNHWCToNCHW-LayoutOptimizer', nodes)
230
231  def _assert_vec_ncdhw_to_ndhwc(self, name, nodes):
232    self.assertIn(name + '-DataFormatVecPermuteNCDHWToNDHWC-LayoutOptimizer',
233                  nodes)
234
235  def _assert_vec_ndhwc_to_ncdhw(self, name, nodes):
236    self.assertIn(name + '-DataFormatVecPermuteNDHWCToNCDHW-LayoutOptimizer',
237                  nodes)
238
239  def _train(self, checkpoint_path, layout_optimizer=False, restore=False):
240    ops.reset_default_graph()
241    graph = ops.get_default_graph()
242    with session.Session(
243        config=_get_config(layout_optimizer), graph=graph) as sess:
244      batch = 2
245      height = 6
246      width = 7
247      input_channels = 3
248      shape = [batch, height, width, input_channels]
249      image = array_ops.placeholder(dtype='float32', shape=shape)
250      conv1 = conv_layers.conv2d(image, 32, [3, 3])
251      conv2 = conv_layers.conv2d(conv1, 32, [3, 3])
252      optimizer = gradient_descent.GradientDescentOptimizer(0.01)
253      loss = math_ops.reduce_mean(conv2)
254      train_op = optimizer.minimize(loss)
255      saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
256
257      if restore:
258        saver.restore(sess, checkpoint_path)
259      else:
260        self.evaluate(variables.global_variables_initializer())
261
262      np.random.seed(0)
263      for _ in range(2):
264        image_val = np.random.rand(*shape).astype(np.float32)
265        sess.run([loss, train_op], feed_dict={image: image_val})
266
267      if restore:
268        all_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
269        all_vars_values = [var.eval(session=sess) for var in all_vars]
270        return all_vars_values
271      else:
272        saver.save(sess, checkpoint_path)
273
274  @test_util.deprecated_graph_mode_only
275  def testTwoConvLayers(self):
276    if test.is_gpu_available(cuda_only=True):
277      random_seed.set_random_seed(0)
278      x = random_ops.truncated_normal([1, 784], seed=0)
279      output = _two_layer_model(x)
280
281      with session.Session(config=_get_config(False)) as sess:
282        output_val_ref = self.evaluate(output)
283
284      with session.Session(config=_get_config()) as sess:
285        metadata = config_pb2.RunMetadata()
286        output_val = sess.run(output, run_metadata=metadata)
287
288      nodes = []
289      num_transposes = 0
290      for node in metadata.cost_graph.node:
291        if _is_transpose(node.name):
292          num_transposes += 1
293        nodes.append(node.name)
294
295      # Four transposes were initially added in the Expand phase of
296      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
297      expected_num_transposes = 2
298      self.assertEqual(expected_num_transposes, num_transposes)
299      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
300      self._assert_trans_nchw_to_nhwc('Relu_1-0-0', nodes)
301
302      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
303
304  @test_util.deprecated_graph_mode_only
305  def testSplitWithNonConstAxis(self):
306    if test.is_gpu_available(cuda_only=True):
307      random_seed.set_random_seed(0)
308      x = random_ops.truncated_normal([1, 784], seed=0)
309      conv = _two_layer_model(x)
310      dim = array_ops.placeholder(dtype='int32')
311      split = array_ops.split(conv, 2, axis=dim)
312      scale = constant_op.constant(0.1, shape=[32])
313      offset = constant_op.constant(0.3, shape=[32])
314      bn0 = nn.fused_batch_norm(split[0], scale, offset)
315      bn1 = nn.fused_batch_norm(split[1], scale, offset)
316      add = bn0[0] + bn1[0]
317      output = array_ops.identity(add)
318
319      with session.Session(config=_get_config(False)) as sess:
320        output_val_ref = sess.run(output, feed_dict={dim: 3})
321
322      with session.Session(config=_get_config()) as sess:
323        metadata = config_pb2.RunMetadata()
324        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
325
326      nodes = []
327      num_transposes = 0
328      for node in metadata.cost_graph.node:
329        if _is_transpose(node.name):
330          num_transposes += 1
331        nodes.append(node.name)
332
333      expected_num_transposes = 2
334      self.assertEqual(expected_num_transposes, num_transposes)
335      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
336      self._assert_trans_nchw_to_nhwc('add_2-0-0', nodes)
337      self._assert_map_nhwc_to_nchw('split-0', nodes)
338      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
339
340  @test_util.deprecated_graph_mode_only
341  def testSplitVWithNonConstAxis(self):
342    if test.is_gpu_available(cuda_only=True):
343      random_seed.set_random_seed(0)
344      x = random_ops.truncated_normal([1, 784], seed=0)
345      conv = _two_layer_model(x)
346      dim = array_ops.placeholder(dtype='int32')
347      sizes = constant_op.constant([50, 10, 4], shape=[3])
348      split = gen_array_ops.split_v(
349          value=conv, size_splits=sizes, axis=dim, num_split=3)
350      output = math_ops.reduce_sum(split[0])
351
352      with session.Session(config=_get_config(False)) as sess:
353        output_val_ref = sess.run(output, feed_dict={dim: 3})
354
355      with session.Session(config=_get_config()) as sess:
356        metadata = config_pb2.RunMetadata()
357        output_val = sess.run(output, run_metadata=metadata, feed_dict={dim: 3})
358
359      nodes = []
360      num_transposes = 0
361      for node in metadata.cost_graph.node:
362        if _is_transpose(node.name):
363          num_transposes += 1
364        nodes.append(node.name)
365
366      # Four transposes were initially added in the Expand phase of
367      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
368      expected_num_transposes = 2
369      self.assertEqual(expected_num_transposes, num_transposes)
370      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
371      self._assert_trans_nchw_to_nhwc('SplitV-0-0', nodes)
372      self._assert_map_nhwc_to_nchw('SplitV-2', nodes)
373      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
374
375  @test_util.deprecated_graph_mode_only
376  def testPadWithConstPaddings(self):
377    if test.is_gpu_available(cuda_only=True):
378      random_seed.set_random_seed(0)
379      x = random_ops.truncated_normal([1, 784], seed=0)
380      conv = _two_layer_model(x)
381      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
382      paddings = constant_op.constant(
383          paddings_val, dtype='int32', name='PaddingsConst')
384      pad = array_ops.pad(conv, paddings)
385      output = array_ops.identity(pad)
386
387      with session.Session(config=_get_config(False)) as sess:
388        output_val_ref = self.evaluate(output)
389
390      with session.Session(config=_get_config()) as sess:
391        metadata = config_pb2.RunMetadata()
392        output_val = sess.run(output, run_metadata=metadata)
393
394      nodes = []
395      num_transposes = 0
396      for node in metadata.cost_graph.node:
397        if _is_transpose(node.name):
398          num_transposes += 1
399        nodes.append(node.name)
400
401      # Four transposes were initially added in the Expand phase of
402      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
403      expected_num_transposes = 2
404      self.assertEqual(expected_num_transposes, num_transposes)
405      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
406      self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
407      self.assertIn('Pad-1-LayoutOptimizer', nodes)
408      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
409
410  @test_util.deprecated_graph_mode_only
411  def testReduceSum(self):
412    if test.is_gpu_available(cuda_only=True):
413      random_seed.set_random_seed(0)
414      x = random_ops.truncated_normal([1, 784], seed=0)
415      conv = _two_layer_model(x)
416      reduce_sum = math_ops.reduce_sum(conv)
417      output = array_ops.identity(reduce_sum)
418
419      with session.Session(config=_get_config(False)) as sess:
420        output_val_ref = self.evaluate(output)
421
422      with session.Session(config=_get_config()) as sess:
423        metadata = config_pb2.RunMetadata()
424        output_val = sess.run(output, run_metadata=metadata)
425
426      nodes = []
427      num_transposes = 0
428      for node in metadata.cost_graph.node:
429        if _is_transpose(node.name):
430          num_transposes += 1
431        nodes.append(node.name)
432
433      # Three transposes were initially added in the Expand phase of
434      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
435      expected_num_transposes = 1
436      self.assertEqual(expected_num_transposes, num_transposes)
437      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
438      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
439
440  @test_util.deprecated_graph_mode_only
441  def testCast(self):
442    if test.is_gpu_available(cuda_only=True):
443      random_seed.set_random_seed(0)
444      x = random_ops.truncated_normal([1, 784], seed=0)
445      conv = _two_layer_model(x)
446      cast = math_ops.cast(conv, dtype='bool')
447      output = array_ops.identity(cast)
448
449      with session.Session(config=_get_config(False)) as sess:
450        output_val_ref = self.evaluate(output)
451
452      with session.Session(config=_get_config()) as sess:
453        metadata = config_pb2.RunMetadata()
454        output_val = sess.run(output, run_metadata=metadata)
455
456      nodes = []
457      num_transposes = 0
458      for node in metadata.cost_graph.node:
459        if _is_transpose(node.name):
460          num_transposes += 1
461        nodes.append(node.name)
462
463      # Four transposes were initially added in the Expand phase of
464      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
465      expected_num_transposes = 2
466      self.assertEqual(expected_num_transposes, num_transposes)
467      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
468      self._assert_trans_nchw_to_nhwc('Cast-0-0', nodes)
469      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
470
471  @test_util.deprecated_graph_mode_only
472  def testSqueeze(self):
473    if test.is_gpu_available(cuda_only=True):
474      random_seed.set_random_seed(0)
475      x = random_ops.truncated_normal([1, 784], seed=0)
476      conv = _two_layer_model(x)
477      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2])
478      squeeze = array_ops.squeeze(reduce_sum)
479      output = array_ops.identity(squeeze)
480
481      with session.Session(config=_get_config(False)) as sess:
482        output_val_ref = self.evaluate(output)
483
484      with session.Session(config=_get_config()) as sess:
485        metadata = config_pb2.RunMetadata()
486        output_val = sess.run(output, run_metadata=metadata)
487
488      nodes = []
489      num_transposes = 0
490      for node in metadata.cost_graph.node:
491        if _is_transpose(node.name):
492          num_transposes += 1
493        nodes.append(node.name)
494
495      # Three transposes were initially added in the Expand phase of
496      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
497      expected_num_transposes = 1
498      self.assertEqual(expected_num_transposes, num_transposes)
499      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
500      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
501
502  @test_util.deprecated_graph_mode_only
503  def testSqueezeAlongHW(self):
504    if test.is_gpu_available(cuda_only=True):
505      random_seed.set_random_seed(0)
506      x = random_ops.truncated_normal([1, 784], seed=0)
507      conv = _two_layer_model(x)
508      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keepdims=True)
509      squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2])
510      output = array_ops.identity(squeeze)
511
512      with session.Session(config=_get_config(False)) as sess:
513        output_val_ref = self.evaluate(output)
514
515      with session.Session(config=_get_config()) as sess:
516        metadata = config_pb2.RunMetadata()
517        output_val = sess.run(output, run_metadata=metadata)
518
519      nodes = []
520      num_transposes = 0
521      for node in metadata.cost_graph.node:
522        if _is_transpose(node.name):
523          num_transposes += 1
524        nodes.append(node.name)
525
526      # Three transposes were initially added in the Expand phase of
527      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
528      expected_num_transposes = 1
529      self.assertEqual(expected_num_transposes, num_transposes)
530      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
531      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
532
533  @test_util.deprecated_graph_mode_only
534  def testSqueezeAlongNHW(self):
535    if test.is_gpu_available(cuda_only=True):
536      random_seed.set_random_seed(0)
537      x = random_ops.truncated_normal([1, 784], seed=0)
538      conv = _two_layer_model(x)
539      reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keepdims=True)
540      squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2])
541      output = array_ops.identity(squeeze)
542
543      with session.Session(config=_get_config(False)) as sess:
544        output_val_ref = self.evaluate(output)
545
546      with session.Session(config=_get_config()) as sess:
547        metadata = config_pb2.RunMetadata()
548        output_val = sess.run(output, run_metadata=metadata)
549
550      nodes = []
551      num_transposes = 0
552      for node in metadata.cost_graph.node:
553        if _is_transpose(node.name):
554          num_transposes += 1
555        nodes.append(node.name)
556
557      # Three transposes were initially added in the Expand phase of
558      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
559      expected_num_transposes = 1
560      self.assertEqual(expected_num_transposes, num_transposes)
561      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
562      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
563
564  @test_util.deprecated_graph_mode_only
565  def testReduceSumAlongHWC(self):
566    if test.is_gpu_available(cuda_only=True):
567      random_seed.set_random_seed(0)
568      x = random_ops.truncated_normal([1, 784], seed=0)
569      conv = _two_layer_model(x)
570      reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3])
571      output = array_ops.identity(reduce_sum)
572
573      with session.Session(config=_get_config(False)) as sess:
574        output_val_ref = self.evaluate(output)
575
576      with session.Session(config=_get_config()) as sess:
577        metadata = config_pb2.RunMetadata()
578        output_val = sess.run(output, run_metadata=metadata)
579
580      nodes = []
581      num_transposes = 0
582      for node in metadata.cost_graph.node:
583        if _is_transpose(node.name):
584          num_transposes += 1
585        nodes.append(node.name)
586
587      # Three transposes were initially added in the Expand phase of
588      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
589      expected_num_transposes = 1
590      self.assertEqual(expected_num_transposes, num_transposes)
591      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
592      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
593
594  @test_util.deprecated_graph_mode_only
595  def testReduceSumAlongNHW(self):
596    if test.is_gpu_available(cuda_only=True):
597      random_seed.set_random_seed(0)
598      x = random_ops.truncated_normal([1, 784], seed=0)
599      conv = _two_layer_model(x)
600      reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2])
601      output = array_ops.identity(reduce_sum)
602
603      with session.Session(config=_get_config(False)) as sess:
604        output_val_ref = self.evaluate(output)
605
606      with session.Session(config=_get_config()) as sess:
607        metadata = config_pb2.RunMetadata()
608        output_val = sess.run(output, run_metadata=metadata)
609
610      nodes = []
611      num_transposes = 0
612      for node in metadata.cost_graph.node:
613        if _is_transpose(node.name):
614          num_transposes += 1
615        nodes.append(node.name)
616
617      # Three transposes were initially added in the Expand phase of
618      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
619      expected_num_transposes = 1
620      self.assertEqual(expected_num_transposes, num_transposes)
621      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
622      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
623
624  @test_util.deprecated_graph_mode_only
625  def testReduceSumAlongC(self):
626    if test.is_gpu_available(cuda_only=True):
627      random_seed.set_random_seed(0)
628      x = random_ops.truncated_normal([1, 784], seed=0)
629      conv = _two_layer_model(x)
630      reduce_sum = math_ops.reduce_sum(conv, axis=[3])
631      output = array_ops.identity(reduce_sum)
632
633      with session.Session(config=_get_config(False)) as sess:
634        output_val_ref = self.evaluate(output)
635
636      with session.Session(config=_get_config()) as sess:
637        metadata = config_pb2.RunMetadata()
638        output_val = sess.run(output, run_metadata=metadata)
639
640      nodes = []
641      num_transposes = 0
642      for node in metadata.cost_graph.node:
643        if _is_transpose(node.name):
644          num_transposes += 1
645        nodes.append(node.name)
646
647      # Three transposes were initially added in the Expand phase of
648      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
649      expected_num_transposes = 1
650      self.assertEqual(expected_num_transposes, num_transposes)
651      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
652      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
653
654  @test_util.deprecated_graph_mode_only
655  def testReduceSumAlongCKeepDims(self):
656    if test.is_gpu_available(cuda_only=True):
657      random_seed.set_random_seed(0)
658      x = random_ops.truncated_normal([1, 784], seed=0)
659      conv = _two_layer_model(x)
660      reduce_sum = math_ops.reduce_sum(conv, axis=[3], keepdims=True)
661      output = array_ops.identity(reduce_sum)
662
663      with session.Session(config=_get_config(False)) as sess:
664        output_val_ref = self.evaluate(output)
665
666      with session.Session(config=_get_config()) as sess:
667        metadata = config_pb2.RunMetadata()
668        output_val = sess.run(output, run_metadata=metadata)
669
670      nodes = []
671      num_transposes = 0
672      for node in metadata.cost_graph.node:
673        if _is_transpose(node.name):
674          num_transposes += 1
675        nodes.append(node.name)
676
677      # Four transposes were initially added in the Expand phase of
678      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
679      expected_num_transposes = 2
680      self.assertEqual(expected_num_transposes, num_transposes)
681      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
682      self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes)
683      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
684
685  @test_util.deprecated_graph_mode_only
686  def testReduceSumAlongHKeepDims(self):
687    if test.is_gpu_available(cuda_only=True):
688      random_seed.set_random_seed(0)
689      x = random_ops.truncated_normal([1, 784], seed=0)
690      conv = _two_layer_model(x)
691      reduce_sum = math_ops.reduce_sum(conv, axis=[2], keepdims=True)
692      output = array_ops.identity(reduce_sum)
693
694      with session.Session(config=_get_config(False)) as sess:
695        output_val_ref = self.evaluate(output)
696
697      with session.Session(config=_get_config()) as sess:
698        metadata = config_pb2.RunMetadata()
699        output_val = sess.run(output, run_metadata=metadata)
700
701      nodes = []
702      num_transposes = 0
703      for node in metadata.cost_graph.node:
704        if _is_transpose(node.name):
705          num_transposes += 1
706        nodes.append(node.name)
707
708      # Four transposes were initially added in the Expand phase of
709      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
710      expected_num_transposes = 2
711      self.assertEqual(expected_num_transposes, num_transposes)
712      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
713      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
714
715  @test_util.deprecated_graph_mode_only
716  def testReduceSumAlongWCKeepDims(self):
717    if test.is_gpu_available(cuda_only=True):
718      random_seed.set_random_seed(0)
719      x = random_ops.truncated_normal([1, 784], seed=0)
720      conv = _two_layer_model(x)
721      reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keepdims=True)
722      output = array_ops.identity(reduce_sum)
723
724      with session.Session(config=_get_config(False)) as sess:
725        output_val_ref = self.evaluate(output)
726
727      with session.Session(config=_get_config()) as sess:
728        metadata = config_pb2.RunMetadata()
729        output_val = sess.run(output, run_metadata=metadata)
730
731      nodes = []
732      num_transposes = 0
733      for node in metadata.cost_graph.node:
734        if _is_transpose(node.name):
735          num_transposes += 1
736        nodes.append(node.name)
737
738      # Four transposes were initially added in the Expand phase of
739      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
740      expected_num_transposes = 2
741      self.assertEqual(expected_num_transposes, num_transposes)
742      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
743      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
744
745  @test_util.deprecated_graph_mode_only
746  def testConcatWithControlDependency(self):
747    if test.is_gpu_available(cuda_only=True):
748      random_seed.set_random_seed(0)
749      x = random_ops.truncated_normal([1, 784], seed=0)
750      conv = _two_layer_model(x)
751      axis = constant_op.constant(3)
752      var = variables.Variable(3)
753      assign = state_ops.assign(var, 6)
754      with ops.control_dependencies([assign]):
755        concat = array_ops.concat([conv, conv], axis)
756      output = array_ops.identity(concat)
757
758      with session.Session(config=_get_config(False)) as sess:
759        output_val_ref = self.evaluate(output)
760
761      with session.Session(config=_get_config()) as sess:
762        metadata = config_pb2.RunMetadata()
763        output_val = sess.run(output, run_metadata=metadata)
764
765      nodes = []
766      num_transposes = 0
767      for node in metadata.cost_graph.node:
768        if _is_transpose(node.name):
769          num_transposes += 1
770        nodes.append(node.name)
771
772      # Four transposes were initially added in the Expand phase of
773      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
774      expected_num_transposes = 2
775      self.assertEqual(expected_num_transposes, num_transposes)
776      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
777      self._assert_trans_nchw_to_nhwc('concat-0-0', nodes)
778      self.assertIn('concat-2-LayoutOptimizer', nodes)
779      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
780
781  @test_util.deprecated_graph_mode_only
782  def testConcatWithControlDependencyFor5DTensor(self):
783    if not test.is_gpu_available(cuda_only=True):
784      self.skipTest('GPU required')
785    random_seed.set_random_seed(0)
786    x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
787    w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0)
788    strides = [1, 1, 1, 1, 1]
789    y = gen_nn_ops.conv3d(x, w, strides, 'SAME')
790    axis = constant_op.constant(4)
791    var = variables.Variable(3)
792    assign = state_ops.assign(var, 6)
793    with ops.control_dependencies([assign]):
794      concat = array_ops.concat([y, y], axis)
795    output = array_ops.identity(concat)
796
797    with session.Session(config=_get_config(False)) as sess:
798      output_val_ref = self.evaluate(output)
799
800    with session.Session(config=_get_config()) as sess:
801      metadata = config_pb2.RunMetadata()
802      output_val = sess.run(output, run_metadata=metadata)
803
804    nodes = []
805    num_transposes = 0
806    for node in metadata.cost_graph.node:
807      if _is_transpose(node.name):
808        num_transposes += 1
809      nodes.append(node.name)
810
811    # Four transposes were initially added in the Expand phase of
812    # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
813    expected_num_transposes = 2
814    self.assertEqual(expected_num_transposes, num_transposes)
815    self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
816    self._assert_trans_ncdhw_to_ndhwc('concat-0-0', nodes)
817    self._assert_map_ndhwc_to_ncdhw('concat-2', nodes)
818    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
819
820  @test_util.deprecated_graph_mode_only
821  def testFill(self):
822    if test.is_gpu_available(cuda_only=True):
823      random_seed.set_random_seed(0)
824      x = array_ops.placeholder(dtype='float32')
825      conv = _two_layer_model(x)
826      shape = array_ops.shape(conv)
827      scalar = array_ops.constant(5.7)
828      fill = array_ops.fill(shape, scalar)
829      output = array_ops.identity(fill)
830
831      x_val = [3.4] * 784
832      with session.Session(config=_get_config(False)) as sess:
833        output_val_ref = sess.run(output, feed_dict={x: x_val})
834
835      with session.Session(config=_get_config()) as sess:
836        metadata = config_pb2.RunMetadata()
837        output_val = sess.run(
838            output, run_metadata=metadata, feed_dict={
839                x: x_val
840            })
841
842      nodes = []
843      num_transposes = 0
844      num_vec_permute = 0
845      for node in metadata.cost_graph.node:
846        if _is_transpose(node.name):
847          num_transposes += 1
848        if _is_permute(node.name):
849          num_vec_permute += 1
850        nodes.append(node.name)
851
852      # Four transposes were initially added in the Expand phase of
853      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
854      expected_num_transposes = 2
855      self.assertEqual(expected_num_transposes, num_transposes)
856      # Two vector permute nodes were initially added in the Expand phase of
857      # LayoutOptimizer; they cancelled out each other in the Collapse phase.
858      expected_vec_permute = 0
859      self.assertEqual(expected_vec_permute, num_vec_permute)
860      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
861      self._assert_trans_nchw_to_nhwc('Fill-0-0', nodes)
862      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
863
864  @test_util.deprecated_graph_mode_only
865  def testTile(self):
866    if test.is_gpu_available(cuda_only=True):
867      random_seed.set_random_seed(0)
868      x = random_ops.truncated_normal([1, 784], seed=0)
869      conv = _two_layer_model(x)
870      multiple = array_ops.placeholder(dtype='int32')
871      tile = array_ops.tile(conv, multiple)
872      output = array_ops.identity(tile)
873
874      multiple_val = [2, 3, 4, 1]
875      with session.Session(config=_get_config(False)) as sess:
876        output_val_ref = sess.run(output, feed_dict={multiple: multiple_val})
877
878      with session.Session(config=_get_config()) as sess:
879        metadata = config_pb2.RunMetadata()
880        output_val = sess.run(
881            output, run_metadata=metadata, feed_dict={
882                multiple: multiple_val
883            })
884
885      nodes = []
886      num_transposes = 0
887      for node in metadata.cost_graph.node:
888        if _is_transpose(node.name):
889          num_transposes += 1
890        nodes.append(node.name)
891
892      # Four transposes were initially added in the Expand phase of
893      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
894      expected_num_transposes = 2
895      self.assertEqual(expected_num_transposes, num_transposes)
896      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
897      self._assert_trans_nchw_to_nhwc('Tile-0-0', nodes)
898      self._assert_vec_nhwc_to_nchw('Tile-1', nodes)
899      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
900
901  @test_util.deprecated_graph_mode_only
902  def testReverseWithConstDims(self):
903    if test.is_gpu_available(cuda_only=True):
904      random_seed.set_random_seed(0)
905      x = random_ops.truncated_normal([1, 784], seed=0)
906      conv = _two_layer_model(x)
907      dims = constant_op.constant([3, 1], name='DimsConst')
908      reverse = array_ops.reverse(conv, dims)
909      output = array_ops.identity(reverse)
910
911      with session.Session(config=_get_config(False)) as sess:
912        output_val_ref = self.evaluate(output)
913
914      with session.Session(config=_get_config()) as sess:
915        metadata = config_pb2.RunMetadata()
916        output_val = sess.run(output, run_metadata=metadata)
917
918      nodes = []
919      num_transposes = 0
920      for node in metadata.cost_graph.node:
921        if _is_transpose(node.name):
922          num_transposes += 1
923        nodes.append(node.name)
924
925      # Four transposes were initially added in the Expand phase of
926      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
927      expected_num_transposes = 2
928      self.assertEqual(expected_num_transposes, num_transposes)
929      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
930      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
931      self.assertIn('ReverseV2-1-LayoutOptimizer', nodes)
932      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
933
934  @test_util.deprecated_graph_mode_only
935  def testReverseWithNonConstDims(self):
936    if test.is_gpu_available(cuda_only=True):
937      random_seed.set_random_seed(0)
938      x = random_ops.truncated_normal([1, 784], seed=0)
939      conv = _two_layer_model(x)
940      dims = array_ops.placeholder(dtype='int32')
941      reverse = array_ops.reverse(conv, dims)
942      output = array_ops.identity(reverse)
943
944      dims_val = [2, 3]
945      with session.Session(config=_get_config(False)) as sess:
946        output_val_ref = sess.run(output, feed_dict={dims: dims_val})
947
948      with session.Session(config=_get_config()) as sess:
949        metadata = config_pb2.RunMetadata()
950        output_val = sess.run(
951            output, run_metadata=metadata, feed_dict={
952                dims: dims_val
953            })
954
955      nodes = []
956      num_transposes = 0
957      for node in metadata.cost_graph.node:
958        if _is_transpose(node.name):
959          num_transposes += 1
960        nodes.append(node.name)
961
962      # Four transposes were initially added in the Expand phase of
963      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
964      expected_num_transposes = 2
965      self.assertEqual(expected_num_transposes, num_transposes)
966      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
967      self._assert_trans_nchw_to_nhwc('ReverseV2-0-0', nodes)
968      self._assert_map_nhwc_to_nchw('ReverseV2-1', nodes)
969      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
970
971  @test_util.deprecated_graph_mode_only
972  def testSelectOp(self):
973    if test.is_gpu_available(cuda_only=True):
974      random_seed.set_random_seed(0)
975      x = random_ops.truncated_normal([1, 784], seed=0)
976      conv = _two_layer_model(x)
977      add = math_ops.add(conv, conv)
978      mean = math_ops.reduce_mean(conv)
979      condition = math_ops.less(conv, mean)
980      select = gen_math_ops.select(condition, conv, add)
981      output = array_ops.identity(select)
982
983      with session.Session(config=_get_config(False)) as sess:
984        output_val_ref = self.evaluate(output)
985
986      with session.Session(config=_get_config()) as sess:
987        metadata = config_pb2.RunMetadata()
988        output_val = sess.run(output, run_metadata=metadata)
989
990      nodes = []
991      num_transposes = 0
992      for node in metadata.cost_graph.node:
993        if _is_transpose(node.name):
994          num_transposes += 1
995        nodes.append(node.name)
996
997      expected_num_transposes = 2
998      self.assertEqual(expected_num_transposes, num_transposes)
999      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1000      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
1001      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1002
1003  @test_util.deprecated_graph_mode_only
1004  def testSelectOpConditionUnknownShape(self):
1005    if test.is_gpu_available(cuda_only=True):
1006      random_seed.set_random_seed(0)
1007      x = random_ops.truncated_normal([1, 784], seed=0)
1008      conv = _two_layer_model(x)
1009      add = math_ops.add(conv, conv)
1010      condition = array_ops.placeholder(dtype='bool')
1011      select = gen_math_ops.select(condition, conv, add)
1012      output = array_ops.identity(select)
1013
1014      condition_val = np.zeros((1, 7, 7, 64))
1015      with session.Session(config=_get_config(False)) as sess:
1016        output_val_ref = sess.run(output, feed_dict={condition: condition_val})
1017
1018      with session.Session(config=_get_config()) as sess:
1019        metadata = config_pb2.RunMetadata()
1020        output_val = sess.run(
1021            output, run_metadata=metadata, feed_dict={condition: condition_val})
1022
1023      nodes = []
1024      num_transposes = 0
1025      for node in metadata.cost_graph.node:
1026        if _is_transpose(node.name):
1027          num_transposes += 1
1028        nodes.append(node.name)
1029
1030      expected_num_transposes = 3
1031      self.assertEqual(expected_num_transposes, num_transposes)
1032      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1033      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1034
1035  @test_util.deprecated_graph_mode_only
1036  def testSelectOpScalarCondition(self):
1037    if test.is_gpu_available(cuda_only=True):
1038      random_seed.set_random_seed(0)
1039      x = random_ops.truncated_normal([1, 784], seed=0)
1040      conv = _two_layer_model(x)
1041      add = math_ops.add(conv, conv)
1042      condition = constant_op.constant(True)
1043      select = gen_math_ops.select(condition, conv, add)
1044      output = array_ops.identity(select)
1045
1046      with session.Session(config=_get_config(False)) as sess:
1047        output_val_ref = self.evaluate(output)
1048
1049      with session.Session(config=_get_config()) as sess:
1050        metadata = config_pb2.RunMetadata()
1051        output_val = sess.run(output, run_metadata=metadata)
1052
1053      nodes = []
1054      num_transposes = 0
1055      for node in metadata.cost_graph.node:
1056        if _is_transpose(node.name):
1057          num_transposes += 1
1058        nodes.append(node.name)
1059
1060      expected_num_transposes = 2
1061      self.assertEqual(expected_num_transposes, num_transposes)
1062      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1063      self._assert_trans_nchw_to_nhwc('Select-0-0', nodes)
1064      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1065
1066  @test_util.deprecated_graph_mode_only
1067  def testPadWithNonConstPaddings(self):
1068    if test.is_gpu_available(cuda_only=True):
1069      random_seed.set_random_seed(0)
1070      x = random_ops.truncated_normal([1, 784], seed=0)
1071      conv = _two_layer_model(x)
1072      paddings = array_ops.placeholder(dtype='int32')
1073      pad = array_ops.pad(conv, paddings)
1074      output = array_ops.identity(pad)
1075
1076      paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]]
1077      with session.Session(config=_get_config(False)) as sess:
1078        output_val_ref = sess.run(output, feed_dict={paddings: paddings_val})
1079
1080      with session.Session(config=_get_config()) as sess:
1081        metadata = config_pb2.RunMetadata()
1082        output_val = sess.run(
1083            output, run_metadata=metadata, feed_dict={
1084                paddings: paddings_val
1085            })
1086
1087      nodes = []
1088      num_transposes = 0
1089      for node in metadata.cost_graph.node:
1090        if _is_transpose(node.name):
1091          num_transposes += 1
1092        nodes.append(node.name)
1093
1094      # Four transposes were initially added in the Expand phase of
1095      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1096      expected_num_transposes = 2
1097      self.assertEqual(expected_num_transposes, num_transposes)
1098      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1099      self._assert_trans_nchw_to_nhwc('Pad-0-0', nodes)
1100      self._assert_vec_nhwc_to_nchw('Pad-1', nodes)
1101      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1102
1103  @test_util.deprecated_graph_mode_only
1104  def testMaxPoolV2(self):
1105    if test.is_gpu_available(cuda_only=True):
1106      random_seed.set_random_seed(0)
1107      x = random_ops.truncated_normal([1, 784], seed=0)
1108      conv = _two_layer_model(x)
1109      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
1110      strides = array_ops.placeholder(dtype='int32', shape=[4])
1111      max_pool = gen_nn_ops.max_pool_v2(conv, ksize, strides, 'VALID')
1112      output = array_ops.identity(max_pool)
1113
1114      strides_val = [1, 3, 2, 1]
1115      with session.Session(config=_get_config(False)) as sess:
1116        output_val_ref = sess.run(output, feed_dict={strides: strides_val})
1117
1118      with session.Session(config=_get_config()) as sess:
1119        metadata = config_pb2.RunMetadata()
1120        output_val = sess.run(
1121            output, run_metadata=metadata, feed_dict={
1122                strides: strides_val
1123            })
1124
1125      nodes = []
1126      num_transposes = 0
1127      for node in metadata.cost_graph.node:
1128        if _is_transpose(node.name):
1129          num_transposes += 1
1130        nodes.append(node.name)
1131
1132      expected_num_transposes = 2
1133      self.assertEqual(expected_num_transposes, num_transposes)
1134      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1135      self._assert_trans_nchw_to_nhwc('MaxPoolV2-0-0', nodes)
1136      self._assert_vec_nhwc_to_nchw('MaxPoolV2-2', nodes)
1137      self.assertIn('MaxPoolV2-1-LayoutOptimizer', nodes)
1138      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1139
1140  @test_util.deprecated_graph_mode_only
1141  def testMaxPoolGradV2(self):
1142    if test.is_gpu_available(cuda_only=True):
1143      random_seed.set_random_seed(0)
1144      x = random_ops.truncated_normal([1, 784], seed=0)
1145      conv = _two_layer_model(x)
1146      ksize = constant_op.constant([1, 2, 3, 1], shape=[4])
1147      strides = array_ops.placeholder(dtype='int32', shape=[4])
1148      max_pool_grad = gen_nn_ops.max_pool_grad_v2(conv, conv, conv, ksize,
1149                                                  strides, 'VALID')
1150      output = array_ops.identity(max_pool_grad)
1151
1152      strides_val = [1, 3, 2, 1]
1153      with session.Session(config=_get_config(False)) as sess:
1154        output_val_ref = sess.run(output, feed_dict={strides: strides_val})
1155
1156      with session.Session(config=_get_config()) as sess:
1157        metadata = config_pb2.RunMetadata()
1158        output_val = sess.run(
1159            output, run_metadata=metadata, feed_dict={
1160                strides: strides_val
1161            })
1162
1163      nodes = []
1164      num_transposes = 0
1165      for node in metadata.cost_graph.node:
1166        if _is_transpose(node.name):
1167          num_transposes += 1
1168        nodes.append(node.name)
1169
1170      expected_num_transposes = 2
1171      self.assertEqual(expected_num_transposes, num_transposes)
1172      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1173      self._assert_trans_nchw_to_nhwc('MaxPoolGradV2-0-0', nodes)
1174      self._assert_vec_nhwc_to_nchw('MaxPoolGradV2-4', nodes)
1175      self.assertIn('MaxPoolGradV2-3-LayoutOptimizer', nodes)
1176      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1177
1178  @test_util.deprecated_graph_mode_only
1179  def testLeakyRelu(self):
1180    if test.is_gpu_available(cuda_only=True):
1181      random_seed.set_random_seed(0)
1182      x = random_ops.truncated_normal([4, 14, 14, 1], seed=0)
1183      w = random_ops.truncated_normal([2, 2, 1, 2], seed=0)
1184      y = nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
1185      y = nn.leaky_relu(y, alpha=0.2)
1186      output = array_ops.identity(y)
1187
1188      with session.Session(config=_get_config(False)) as sess:
1189        output_val_ref = sess.run(output)
1190
1191      with session.Session(config=_get_config()) as sess:
1192        metadata = config_pb2.RunMetadata()
1193        output_val = sess.run(output, run_metadata=metadata)
1194
1195      nodes = []
1196      num_transposes = 0
1197      for node in metadata.cost_graph.node:
1198        if _is_transpose(node.name):
1199          num_transposes += 1
1200        nodes.append(node.name)
1201
1202      expected_num_transposes = 2
1203      self.assertEqual(expected_num_transposes, num_transposes)
1204      self._assert_trans_nchw_to_nhwc('LeakyRelu-0-0', nodes)
1205      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1206
1207  @test_util.deprecated_graph_mode_only
1208  def testLeakyReluGrad(self):
1209    if test.is_gpu_available(cuda_only=True):
1210      random_seed.set_random_seed(0)
1211      x = random_ops.truncated_normal([4, 14, 14, 1], seed=0)
1212      w = random_ops.truncated_normal([2, 2, 1, 1], seed=0)
1213      y = nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
1214      y = gen_nn_ops.leaky_relu_grad(y, x, alpha=0.2)
1215      output = array_ops.identity(y)
1216
1217      with session.Session(config=_get_config(False)) as sess:
1218        output_val_ref = sess.run(output)
1219
1220      with session.Session(config=_get_config()) as sess:
1221        metadata = config_pb2.RunMetadata()
1222        output_val = sess.run(output, run_metadata=metadata)
1223
1224      nodes = []
1225      num_transposes = 0
1226      for node in metadata.cost_graph.node:
1227        if _is_transpose(node.name):
1228          num_transposes += 1
1229        nodes.append(node.name)
1230
1231      expected_num_transposes = 3
1232      self.assertEqual(expected_num_transposes, num_transposes)
1233      self._assert_trans_nhwc_to_nchw('LeakyReluGrad-1', nodes)
1234      self._assert_trans_nchw_to_nhwc('LeakyReluGrad-0-0', nodes)
1235      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1236
1237  @test_util.deprecated_graph_mode_only
1238  def testLeakyReluGradFor5DTensors(self):
1239    if test.is_gpu_available(cuda_only=True):
1240      random_seed.set_random_seed(0)
1241      x = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0)
1242      w = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0)
1243      y = gen_nn_ops.conv3d(x, w, [1, 1, 1, 1, 1], 'SAME')
1244      y = gen_nn_ops.leaky_relu_grad(y, x, alpha=0.2)
1245      output = array_ops.identity(y)
1246
1247      with session.Session(config=_get_config(False)) as sess:
1248        output_val_ref = sess.run(output)
1249
1250      with session.Session(config=_get_config()) as sess:
1251        metadata = config_pb2.RunMetadata()
1252        output_val = sess.run(output, run_metadata=metadata)
1253
1254      nodes = []
1255      num_transposes = 0
1256      for node in metadata.cost_graph.node:
1257        if _is_transpose(node.name):
1258          num_transposes += 1
1259        nodes.append(node.name)
1260
1261      expected_num_transposes = 3
1262      self.assertEqual(expected_num_transposes, num_transposes)
1263      self._assert_trans_ndhwc_to_ncdhw('LeakyReluGrad-1', nodes)
1264      self._assert_trans_ncdhw_to_ndhwc('LeakyReluGrad-0-0', nodes)
1265      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1266
1267  @test_util.deprecated_graph_mode_only
1268  def testReduceOpsFor5DTensors(self):
1269    if test.is_gpu_available(cuda_only=True):
1270      random_seed.set_random_seed(0)
1271      x = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0)
1272      w = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0)
1273      conv3d = gen_nn_ops.conv3d(x, w, [1, 1, 1, 1, 1], 'SAME')
1274      y = math_ops.reduce_mean(conv3d, [0, 1, 2, 3], keepdims=True)
1275      output = array_ops.identity(y)
1276
1277      with session.Session(config=_get_config(False)) as sess:
1278        output_val_ref = sess.run(output)
1279
1280      with session.Session(config=_get_config()) as sess:
1281        metadata = config_pb2.RunMetadata()
1282        output_val = sess.run(output, run_metadata=metadata)
1283
1284      nodes = []
1285      num_transposes = 0
1286      for node in metadata.cost_graph.node:
1287        if _is_transpose(node.name):
1288          num_transposes += 1
1289        nodes.append(node.name)
1290
1291      # The reduce op Mean needs to dim map the input reduce index to NCDHW.
1292      # Then, the output needs to be tranposed back to NDHWC.
1293      expected_num_transposes = 2
1294      self.assertEqual(expected_num_transposes, num_transposes)
1295      self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1296      self._assert_map_ndhwc_to_ncdhw('Mean-1', nodes)
1297      self._assert_trans_ncdhw_to_ndhwc('Mean-0-0', nodes)
1298      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1299
1300  @test_util.deprecated_graph_mode_only
1301  def testBinaryOpsFor5DTensors(self):
1302    if test.is_gpu_available(cuda_only=True):
1303      random_seed.set_random_seed(0)
1304      x = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0)
1305      w = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0)
1306      mean = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0)
1307      variance = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0)
1308      gamma = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0)
1309      beta = random_ops.truncated_normal([1, 1, 1, 1, 3], seed=0)
1310      conv3d = gen_nn_ops.conv3d(x, w, [1, 1, 1, 1, 1], 'SAME')
1311      y = nn.batch_normalization(
1312          conv3d,
1313          mean=mean,
1314          variance=variance,
1315          scale=gamma,
1316          offset=beta,
1317          variance_epsilon=0.001)
1318      output = array_ops.identity(y)
1319
1320      with session.Session(config=_get_config(False)) as sess:
1321        output_val_ref = sess.run(output)
1322
1323      with session.Session(config=_get_config()) as sess:
1324        metadata = config_pb2.RunMetadata()
1325        output_val = sess.run(output, run_metadata=metadata)
1326
1327      nodes = []
1328      num_transposes = 0
1329      for node in metadata.cost_graph.node:
1330        if _is_transpose(node.name):
1331          num_transposes += 1
1332        nodes.append(node.name)
1333
1334      # The binary ops mul_1 and add_1 in batch norm need to transpose one of
1335      # the two inputs to NCDHW. The other input has already been tranposed via
1336      # Conv3D.
1337      expected_num_transposes = 4
1338      self.assertEqual(expected_num_transposes, num_transposes)
1339      self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1340      self._assert_trans_ndhwc_to_ncdhw('batchnorm/mul_1-1', nodes)
1341      self._assert_trans_ndhwc_to_ncdhw('batchnorm/add_1-1', nodes)
1342      self._assert_trans_ncdhw_to_ndhwc('batchnorm/add_1-0-0', nodes)
1343
1344  @test_util.deprecated_graph_mode_only
1345  def testBatchNorm3D(self):
1346    if test.is_gpu_available(cuda_only=True):
1347      random_seed.set_random_seed(0)
1348      x_3d = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0)
1349      filters = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0)
1350      strides_val = [1, 1, 1, 1, 1]
1351      scale = constant_op.constant(0.1, shape=[3])
1352      offset = constant_op.constant(0.3, shape=[3])
1353      conv3d = gen_nn_ops.conv3d(x_3d, filters, strides_val, 'SAME')
1354      y, _, _ = nn.fused_batch_norm(conv3d, scale, offset, data_format='NDHWC')
1355      output = array_ops.identity(y)
1356
1357      with session.Session(config=_get_config(False)) as sess:
1358        output_val_ref = sess.run(output)
1359
1360      with session.Session(config=_get_config()) as sess:
1361        metadata = config_pb2.RunMetadata()
1362        output_val = sess.run(output, run_metadata=metadata)
1363
1364      nodes = []
1365      num_transposes = 0
1366      for node in metadata.cost_graph.node:
1367        if _is_transpose(node.name):
1368          num_transposes += 1
1369        nodes.append(node.name)
1370
1371      expected_num_transposes = 2
1372      self.assertEqual(expected_num_transposes, num_transposes)
1373      self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1374      self._assert_trans_ncdhw_to_ndhwc('FusedBatchNormV3-0-0', nodes)
1375      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1376
1377  @test_util.deprecated_graph_mode_only
1378  def testBatchNormGrad3D(self):
1379    if test.is_gpu_available(cuda_only=True):
1380      random_seed.set_random_seed(0)
1381      x_3d = random_ops.truncated_normal([1, 4, 2, 3, 3], seed=0)
1382      filters = random_ops.truncated_normal([2, 2, 2, 3, 3], seed=0)
1383      strides_val = [1, 1, 1, 1, 1]
1384      scale = constant_op.constant(0.1, shape=[3])
1385      offset = constant_op.constant(0.3, shape=[3])
1386      mean = constant_op.constant(0.1, shape=[3])
1387      variance = constant_op.constant(0.3, shape=[3])
1388      conv3d = gen_nn_ops.conv3d(x_3d, filters, strides_val, 'SAME')
1389      y, running_mean, running_var, r0, r1, r2 = gen_nn_ops.fused_batch_norm_v3(
1390          conv3d,
1391          scale,
1392          offset,
1393          mean,
1394          variance,
1395          epsilon=1.001e-5,
1396          exponential_avg_factor=1.0,
1397          data_format='NDHWC',
1398          is_training=True,
1399          name='batch_norm')
1400      dx, dscale, doffset, _, _ = gen_nn_ops.fused_batch_norm_grad_v3(
1401          y,
1402          x_3d,
1403          scale,
1404          r0,
1405          r1,
1406          r2,
1407          epsilon=1.001e-5,
1408          data_format='NDHWC',
1409          is_training=True)
1410      output = array_ops.identity(dx)
1411
1412      with session.Session(config=_get_config(False)) as sess:
1413        output_val_ref = sess.run(output)
1414
1415      with session.Session(config=_get_config()) as sess:
1416        metadata = config_pb2.RunMetadata()
1417        output_val = sess.run(output, run_metadata=metadata)
1418
1419      nodes = []
1420      num_transposes = 0
1421      for node in metadata.cost_graph.node:
1422        if _is_transpose(node.name):
1423          num_transposes += 1
1424        nodes.append(node.name)
1425
1426      expected_num_transposes = 3
1427      self.assertEqual(expected_num_transposes, num_transposes)
1428      self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1429      self._assert_trans_ndhwc_to_ncdhw('FusedBatchNormGradV3-1', nodes)
1430      self._assert_trans_ncdhw_to_ndhwc('FusedBatchNormGradV3-0-0', nodes)
1431      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1432
1433  @test_util.deprecated_graph_mode_only
1434  def testConv3D(self):
1435    if not test.is_gpu_available(cuda_only=True):
1436      self.skipTest('GPU required')
1437    random_seed.set_random_seed(0)
1438    x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1439    w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0)
1440    strides = [1, 1, 1, 1, 1]
1441    y = gen_nn_ops.conv3d(x, w, strides, 'SAME')
1442    output = array_ops.identity(y)
1443
1444    with session.Session(config=_get_config(False)) as sess:
1445      output_val_ref = sess.run(output)
1446
1447    with session.Session(config=_get_config()) as sess:
1448      metadata = config_pb2.RunMetadata()
1449      output_val = sess.run(output, run_metadata=metadata)
1450
1451    nodes = []
1452    num_transposes = 0
1453    for node in metadata.cost_graph.node:
1454      if _is_transpose(node.name):
1455        num_transposes += 1
1456      nodes.append(node.name)
1457
1458    expected_num_transposes = 2
1459    self.assertEqual(expected_num_transposes, num_transposes)
1460    self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1461    self._assert_trans_ncdhw_to_ndhwc('Conv3D-0-0', nodes)
1462    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1463
1464  @test_util.deprecated_graph_mode_only
1465  def testConv3DBackpropInput(self):
1466    if not test.is_gpu_available(cuda_only=True):
1467      self.skipTest('GPU required')
1468    random_seed.set_random_seed(0)
1469    dy = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1470    w = random_ops.truncated_normal([2, 2, 2, 1, 1], seed=0)
1471    strides = [1, 1, 1, 1, 1]
1472    x_shape = array_ops.shape(dy)
1473    dx = gen_nn_ops.conv3d_backprop_input_v2(x_shape, w, dy, strides, 'SAME')
1474    output = array_ops.identity(dx)
1475
1476    with session.Session(config=_get_config(False)) as sess:
1477      output_val_ref = sess.run(output)
1478
1479    with session.Session(config=_get_config()) as sess:
1480      metadata = config_pb2.RunMetadata()
1481      output_val = sess.run(output, run_metadata=metadata)
1482
1483    nodes = []
1484    num_transposes = 0
1485    for node in metadata.cost_graph.node:
1486      if _is_transpose(node.name):
1487        num_transposes += 1
1488      nodes.append(node.name)
1489
1490    expected_num_transposes = 2
1491    self.assertEqual(expected_num_transposes, num_transposes)
1492    self._assert_vec_ndhwc_to_ncdhw('Conv3DBackpropInputV2-0', nodes)
1493    self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropInputV2-2', nodes)
1494    self._assert_trans_ncdhw_to_ndhwc('Conv3DBackpropInputV2-0-0', nodes)
1495    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1496
1497  @test_util.deprecated_graph_mode_only
1498  def testConv3DBackpropFilter(self):
1499    if not test.is_gpu_available(cuda_only=True):
1500      self.skipTest('GPU required')
1501    random_seed.set_random_seed(0)
1502    x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1503    dy = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1504    strides = [1, 1, 1, 1, 1]
1505    w_shape = constant_op.constant([2, 2, 2, 1, 1], shape=[5])
1506    dw = gen_nn_ops.conv3d_backprop_filter_v2(x, w_shape, dy, strides, 'SAME')
1507    output = array_ops.identity(dw)
1508
1509    with session.Session(config=_get_config(False)) as sess:
1510      output_val_ref = sess.run(output)
1511
1512    with session.Session(config=_get_config()) as sess:
1513      metadata = config_pb2.RunMetadata()
1514      output_val = sess.run(output, run_metadata=metadata)
1515
1516    nodes = []
1517    num_transposes = 0
1518    for node in metadata.cost_graph.node:
1519      if _is_transpose(node.name):
1520        num_transposes += 1
1521      nodes.append(node.name)
1522
1523    expected_num_transposes = 2
1524    self.assertEqual(expected_num_transposes, num_transposes)
1525    self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropFilterV2-0', nodes)
1526    self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropFilterV2-2', nodes)
1527    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1528
1529  @test_util.deprecated_graph_mode_only
1530  def testBiasAddFor5DTensor(self):
1531    if not test.is_gpu_available(cuda_only=True):
1532      self.skipTest('GPU required')
1533    random_seed.set_random_seed(0)
1534    x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1535    w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0)
1536    b = random_ops.truncated_normal([2], seed=0)
1537    strides = [1, 1, 1, 1, 1]
1538    y = gen_nn_ops.conv3d(x, w, strides, 'SAME')
1539    y = gen_nn_ops.bias_add(y, b, 'NHWC')
1540    output = array_ops.identity(y)
1541
1542    with session.Session(config=_get_config(False)) as sess:
1543      output_val_ref = sess.run(output)
1544
1545    with session.Session(config=_get_config()) as sess:
1546      metadata = config_pb2.RunMetadata()
1547      output_val = sess.run(output, run_metadata=metadata)
1548
1549    nodes = []
1550    num_transposes = 0
1551    for node in metadata.cost_graph.node:
1552      if _is_transpose(node.name):
1553        num_transposes += 1
1554      nodes.append(node.name)
1555
1556    expected_num_transposes = 2
1557    self.assertEqual(expected_num_transposes, num_transposes)
1558    self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1559    self._assert_trans_ncdhw_to_ndhwc('BiasAdd-0-0', nodes)
1560    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1561
1562  @test_util.deprecated_graph_mode_only
1563  def testBiasAddGradFor5DTensor(self):
1564    if not test.is_gpu_available(cuda_only=True):
1565      self.skipTest('GPU required')
1566    random_seed.set_random_seed(0)
1567    dy = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1568    w = random_ops.truncated_normal([2, 2, 2, 1, 1], seed=0)
1569    strides = [1, 1, 1, 1, 1]
1570    dy_shape = array_ops.shape(dy)
1571    dx = gen_nn_ops.conv3d_backprop_input_v2(dy_shape, w, dy, strides, 'SAME')
1572    db = gen_nn_ops.bias_add_grad(dx, 'NHWC')
1573    output = array_ops.identity(db)
1574
1575    with session.Session(config=_get_config(False)) as sess:
1576      output_val_ref = sess.run(output)
1577
1578    with session.Session(config=_get_config()) as sess:
1579      metadata = config_pb2.RunMetadata()
1580      output_val = sess.run(output, run_metadata=metadata)
1581
1582    nodes = []
1583    num_transposes = 0
1584    for node in metadata.cost_graph.node:
1585      if _is_transpose(node.name):
1586        num_transposes += 1
1587      nodes.append(node.name)
1588
1589    # The output of Conv3DBackpropInputV2 won't be converted back to NDHWC
1590    # because of the BiasAddGrad.
1591    expected_num_transposes = 1
1592    self.assertEqual(expected_num_transposes, num_transposes)
1593    self._assert_vec_ndhwc_to_ncdhw('Conv3DBackpropInputV2-0', nodes)
1594    self._assert_trans_ndhwc_to_ncdhw('Conv3DBackpropInputV2-2', nodes)
1595    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1596
1597  @test_util.deprecated_graph_mode_only
1598  def testSliceWithNonConstAxis(self):
1599    if test.is_gpu_available(cuda_only=True):
1600      random_seed.set_random_seed(0)
1601      x = random_ops.truncated_normal([1, 784], seed=0)
1602      conv = _two_layer_model(x)
1603      size = array_ops.placeholder(dtype='int32')
1604      s = array_ops.slice(conv, [0, 0, 0, 0], size)
1605      output = array_ops.identity(s)
1606
1607      size_val = [1, 2, 3, 4]
1608      with session.Session(config=_get_config(False)) as sess:
1609        output_val_ref = sess.run(output, feed_dict={size: size_val})
1610
1611      with session.Session(config=_get_config()) as sess:
1612        metadata = config_pb2.RunMetadata()
1613        output_val = sess.run(
1614            output, run_metadata=metadata, feed_dict={
1615                size: size_val
1616            })
1617
1618      nodes = []
1619      num_transposes = 0
1620      for node in metadata.cost_graph.node:
1621        if _is_transpose(node.name):
1622          num_transposes += 1
1623        nodes.append(node.name)
1624
1625      # Four transposes were initially added in the Expand phase of
1626      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1627      expected_num_transposes = 2
1628      self.assertEqual(expected_num_transposes, num_transposes)
1629      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1630      self._assert_trans_nchw_to_nhwc('Slice-0-0', nodes)
1631      self._assert_vec_nhwc_to_nchw('Slice-2', nodes)
1632      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1633
1634  @test_util.deprecated_graph_mode_only
1635  def testSliceWithNonConstAxisFor5DTensor(self):
1636    if not test.is_gpu_available(cuda_only=True):
1637      self.skipTest('GPU required')
1638    random_seed.set_random_seed(0)
1639    x = random_ops.truncated_normal([2, 2, 14, 14, 1], seed=0)
1640    w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0)
1641    strides = [1, 1, 1, 1, 1]
1642    y = gen_nn_ops.conv3d(x, w, strides, 'SAME')
1643    size = array_ops.placeholder(dtype='int32')
1644    s = array_ops.slice(y, [0, 0, 0, 0, 0], size)
1645    output = array_ops.identity(s)
1646
1647    size_val = [1, 1, 2, 2, 1]
1648    with session.Session(config=_get_config(False)) as sess:
1649      output_val_ref = sess.run(output, feed_dict={size: size_val})
1650
1651    with session.Session(config=_get_config()) as sess:
1652      metadata = config_pb2.RunMetadata()
1653      output_val = sess.run(
1654          output, run_metadata=metadata, feed_dict={size: size_val})
1655
1656    nodes = []
1657    num_transposes = 0
1658    for node in metadata.cost_graph.node:
1659      if _is_transpose(node.name):
1660        num_transposes += 1
1661      nodes.append(node.name)
1662
1663    # Four transposes were initially added in the Expand phase of
1664    # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1665    expected_num_transposes = 2
1666    self.assertEqual(expected_num_transposes, num_transposes)
1667    self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1668    self._assert_trans_ncdhw_to_ndhwc('Slice-0-0', nodes)
1669    self._assert_vec_ndhwc_to_ncdhw('Slice-2', nodes)
1670    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1671
1672  @test_util.deprecated_graph_mode_only
1673  def testStridedSliceWithNonConstAxis(self):
1674    if test.is_gpu_available(cuda_only=True):
1675      random_seed.set_random_seed(0)
1676      x = random_ops.truncated_normal([1, 784], seed=0)
1677      conv = _two_layer_model(x)
1678      end = array_ops.placeholder(dtype='int32')
1679      s = array_ops.strided_slice(conv, [0, 0, 0, 0], end, strides=[1, 2, 3, 1])
1680      output = array_ops.identity(s)
1681
1682      end_val = [1, 2, 3, 4]
1683      with session.Session(config=_get_config(False)) as sess:
1684        output_val_ref = sess.run(output, feed_dict={end: end_val})
1685
1686      with session.Session(config=_get_config()) as sess:
1687        metadata = config_pb2.RunMetadata()
1688        output_val = sess.run(
1689            output, run_metadata=metadata, feed_dict={
1690                end: end_val
1691            })
1692
1693      nodes = []
1694      num_transposes = 0
1695      for node in metadata.cost_graph.node:
1696        if _is_transpose(node.name):
1697          num_transposes += 1
1698        nodes.append(node.name)
1699
1700      # Four transposes were initially added in the Expand phase of
1701      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1702      expected_num_transposes = 2
1703      self.assertEqual(expected_num_transposes, num_transposes)
1704      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1705      self._assert_trans_nchw_to_nhwc('StridedSlice-0-0', nodes)
1706      self._assert_vec_nhwc_to_nchw('StridedSlice-2', nodes)
1707      self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
1708      self.assertIn('StridedSlice-3-LayoutOptimizer', nodes)
1709      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1710
1711  @test_util.deprecated_graph_mode_only
1712  def testStridedSliceWithMask1011(self):
1713    if test.is_gpu_available(cuda_only=True):
1714      random_seed.set_random_seed(0)
1715      x = random_ops.truncated_normal([1, 784], seed=0)
1716      conv = _two_layer_model(x)
1717      # This will generate a StridedSlice op with begin mask and
1718      # end mask 11(1011).
1719      s = conv[:, :, 1:-1, :]
1720      output = array_ops.identity(s)
1721
1722      with session.Session(config=_get_config(False)) as sess:
1723        output_val_ref = self.evaluate(output)
1724
1725      with session.Session(config=_get_config()) as sess:
1726        metadata = config_pb2.RunMetadata()
1727        output_val = sess.run(output, run_metadata=metadata)
1728
1729      nodes = []
1730      num_transposes = 0
1731      for node in metadata.cost_graph.node:
1732        if _is_transpose(node.name):
1733          num_transposes += 1
1734        nodes.append(node.name)
1735
1736      # Four transposes were initially added in the Expand phase of
1737      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1738      expected_num_transposes = 2
1739      self.assertEqual(expected_num_transposes, num_transposes)
1740      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1741      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
1742      self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
1743      self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
1744      self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
1745      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1746
1747  @test_util.deprecated_graph_mode_only
1748  def testStridedSliceWithMask0111(self):
1749    if test.is_gpu_available(cuda_only=True):
1750      random_seed.set_random_seed(0)
1751      x = random_ops.truncated_normal([1, 784], seed=0)
1752      conv = _two_layer_model(x)
1753      # This will generate a StridedSlice op with begin mask and
1754      # end mask 7(0111).
1755      s = conv[:, :, :, 1:-1]
1756      output = array_ops.identity(s)
1757
1758      with session.Session(config=_get_config(False)) as sess:
1759        output_val_ref = self.evaluate(output)
1760
1761      with session.Session(config=_get_config()) as sess:
1762        metadata = config_pb2.RunMetadata()
1763        output_val = sess.run(output, run_metadata=metadata)
1764
1765      nodes = []
1766      num_transposes = 0
1767      for node in metadata.cost_graph.node:
1768        if _is_transpose(node.name):
1769          num_transposes += 1
1770        nodes.append(node.name)
1771
1772      # Four transposes were initially added in the Expand phase of
1773      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1774      expected_num_transposes = 2
1775      self.assertEqual(expected_num_transposes, num_transposes)
1776      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1777      self._assert_trans_nchw_to_nhwc('strided_slice-0-0', nodes)
1778      self.assertIn('strided_slice-1-LayoutOptimizer', nodes)
1779      self.assertIn('strided_slice-2-LayoutOptimizer', nodes)
1780      self.assertIn('strided_slice-3-LayoutOptimizer', nodes)
1781      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1782
1783  @test_util.deprecated_graph_mode_only
1784  def testStridedSliceGradWithNonConstAxis(self):
1785    if test.is_gpu_available(cuda_only=True):
1786      random_seed.set_random_seed(0)
1787      x = random_ops.truncated_normal([1, 784], seed=0)
1788      conv = _two_layer_model(x)
1789      end = array_ops.placeholder(dtype='int32')
1790      shape = array_ops.shape(conv)
1791      end_val = [1, 2, 3, 4]
1792      s = array_ops.strided_slice(
1793          conv, [0, 0, 0, 0], end_val, strides=[1, 2, 3, 1])
1794      s_grad = array_ops.strided_slice_grad(shape, [0, 0, 0, 0], end,
1795                                            [1, 2, 3, 1], s)
1796      output = array_ops.identity(s_grad)
1797
1798      with session.Session(config=_get_config(False)) as sess:
1799        output_val_ref = sess.run(output, feed_dict={end: end_val})
1800
1801      with session.Session(config=_get_config()) as sess:
1802        metadata = config_pb2.RunMetadata()
1803        output_val = sess.run(
1804            output, run_metadata=metadata, feed_dict={
1805                end: end_val
1806            })
1807
1808      nodes = []
1809      num_transposes = 0
1810      for node in metadata.cost_graph.node:
1811        if _is_transpose(node.name):
1812          num_transposes += 1
1813        nodes.append(node.name)
1814
1815      # Four transposes were initially added in the Expand phase of
1816      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1817      expected_num_transposes = 2
1818      self.assertEqual(expected_num_transposes, num_transposes)
1819      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1820      self._assert_trans_nchw_to_nhwc('StridedSliceGrad-0-0', nodes)
1821      self._assert_vec_nhwc_to_nchw('StridedSliceGrad-2', nodes)
1822      self.assertIn('StridedSlice-1-LayoutOptimizer', nodes)
1823      self.assertIn('StridedSlice-2-LayoutOptimizer', nodes)
1824      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1825
1826  @test_util.deprecated_graph_mode_only
1827  def testShapeN(self):
1828    if test.is_gpu_available(cuda_only=True):
1829      x = array_ops.placeholder(dtype='float32')
1830      conv = _two_layer_model(x)
1831      shapen = array_ops.shape_n([conv, conv])
1832      output = math_ops.add(shapen[0], shapen[1])
1833
1834      x_val = [1.7] * 784
1835      with session.Session(config=_get_config(False)) as sess:
1836        output_val_ref = sess.run(output, feed_dict={x: x_val})
1837
1838      with session.Session(config=_get_config()) as sess:
1839        metadata = config_pb2.RunMetadata()
1840        output_val = sess.run(
1841            output, run_metadata=metadata, feed_dict={
1842                x: x_val
1843            })
1844
1845      nodes = []
1846      num_transposes = 0
1847      for node in metadata.cost_graph.node:
1848        if _is_transpose(node.name):
1849          num_transposes += 1
1850        nodes.append(node.name)
1851
1852      expected_num_transposes = 1
1853      self.assertEqual(expected_num_transposes, num_transposes)
1854      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1855      self._assert_vec_nchw_to_nhwc('ShapeN-0-0', nodes)
1856      self.assertAllEqual(output_val_ref, output_val)
1857
1858  @test_util.deprecated_graph_mode_only
1859  def testShapeNFor5DTensor(self):
1860    if not test.is_gpu_available(cuda_only=True):
1861      self.skipTest('GPU required')
1862    h = array_ops.placeholder(dtype='float32')
1863    x = array_ops.reshape(h, [-1, 2, 14, 14, 1])
1864    w = random_ops.truncated_normal([2, 2, 2, 1, 2], seed=0)
1865    strides = [1, 1, 1, 1, 1]
1866    y = gen_nn_ops.conv3d(x, w, strides, 'SAME')
1867    shapen = array_ops.shape_n([y, y])
1868    output = math_ops.add(shapen[0], shapen[1])
1869
1870    x_val = [1.7] * 784
1871    with session.Session(config=_get_config(False)) as sess:
1872      output_val_ref = sess.run(output, feed_dict={h: x_val})
1873
1874    with session.Session(config=_get_config()) as sess:
1875      metadata = config_pb2.RunMetadata()
1876      output_val = sess.run(output, run_metadata=metadata, feed_dict={h: x_val})
1877
1878    nodes = []
1879    num_transposes = 0
1880    for node in metadata.cost_graph.node:
1881      if _is_transpose(node.name):
1882        num_transposes += 1
1883      nodes.append(node.name)
1884
1885    expected_num_transposes = 1
1886    self.assertEqual(expected_num_transposes, num_transposes)
1887    self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1888    self._assert_vec_ncdhw_to_ndhwc('ShapeN-0-0', nodes)
1889    self._assert_vec_ncdhw_to_ndhwc('ShapeN-1-0', nodes)
1890    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1891
1892  @test_util.deprecated_graph_mode_only
1893  def testIdentityNFor4DAnd5DTensors(self):
1894    if not test.is_gpu_available(cuda_only=True):
1895      self.skipTest('GPU required')
1896    h = array_ops.placeholder(dtype='float32')
1897    x = array_ops.reshape(h, [-1, 2, 14, 14, 1])
1898    w = random_ops.truncated_normal([2, 2, 2, 1, 4], seed=0)
1899    strides = [1, 1, 1, 1, 1]
1900    y = gen_nn_ops.conv3d(x, w, strides, 'SAME')
1901    x1 = array_ops.reshape(h, [-1, 784])
1902    y1 = _two_layer_model(x1)
1903    outputs = array_ops.identity_n([y1, y])
1904    new_x0 = array_ops.reshape(outputs[0], [-1, 2, 14, 14, 1])
1905    new_x1 = array_ops.reshape(outputs[1], [-1, 2, 14, 14, 1])
1906    output = math_ops.add(new_x0, new_x1)
1907
1908    x_val = [1.7] * 784
1909    with session.Session(config=_get_config(False)) as sess:
1910      output_val_ref = sess.run(output, feed_dict={h: x_val})
1911
1912    with session.Session(config=_get_config()) as sess:
1913      metadata = config_pb2.RunMetadata()
1914      output_val = sess.run(output, run_metadata=metadata, feed_dict={h: x_val})
1915
1916    nodes = []
1917    num_transposes = 0
1918    for node in metadata.cost_graph.node:
1919      if _is_transpose(node.name):
1920        num_transposes += 1
1921      nodes.append(node.name)
1922
1923    expected_num_transposes = 4
1924    self.assertEqual(expected_num_transposes, num_transposes)
1925    self._assert_trans_ndhwc_to_ncdhw('Conv3D-0', nodes)
1926    self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1927    self._assert_trans_ncdhw_to_ndhwc('IdentityN-1-0', nodes)
1928    self._assert_trans_nchw_to_nhwc('IdentityN-0-0', nodes)
1929    self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1930
1931  @test_util.deprecated_graph_mode_only
1932  def testShapeNFollowedByNotConvertibleNodeReshape(self):
1933    if test.is_gpu_available(cuda_only=True):
1934      x = array_ops.placeholder(dtype='float32')
1935      conv = _two_layer_model(x)
1936      conv_reshape = array_ops.reshape(conv, [1, 1, 1, -1])
1937      shapen = array_ops.shape_n([conv, conv_reshape])
1938      shape = array_ops.identity(shapen[1])
1939      ones = array_ops.ones(shape)
1940      output = math_ops.add_n([conv_reshape, ones])
1941
1942      x_val = [1.7] * 784
1943      with session.Session(config=_get_config(False)) as sess:
1944        output_val_ref = sess.run(output, feed_dict={x: x_val})
1945
1946      with session.Session(config=_get_config()) as sess:
1947        metadata = config_pb2.RunMetadata()
1948        output_val = sess.run(
1949            output, run_metadata=metadata, feed_dict={x: x_val})
1950
1951      nodes = []
1952      num_transposes = 0
1953      for node in metadata.cost_graph.node:
1954        if _is_transpose(node.name):
1955          num_transposes += 1
1956        nodes.append(node.name)
1957
1958      expected_num_transposes = 2
1959      self.assertEqual(expected_num_transposes, num_transposes)
1960      self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes)
1961      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1962
1963  @test_util.deprecated_graph_mode_only
1964  def testLoop(self):
1965    if test.is_gpu_available(cuda_only=True):
1966      output = _loop()
1967
1968      with session.Session(config=_get_config(False)) as sess:
1969        output_val_ref = self.evaluate(output)
1970
1971      with session.Session(config=_get_config()) as sess:
1972        metadata = config_pb2.RunMetadata()
1973        output_val = sess.run(output, run_metadata=metadata)
1974
1975      nodes = []
1976      num_transposes = 0
1977      for node in metadata.cost_graph.node:
1978        if _is_transpose(node.name):
1979          num_transposes += 1
1980        nodes.append(node.name)
1981
1982      # Four transposes were initially added in the Expand phase of
1983      # LayoutOptimizer; two of them are cancelled out in the Collapse phase.
1984      expected_num_transposes = 2
1985      self.assertEqual(expected_num_transposes, num_transposes)
1986      self.assertEqual(expected_num_transposes, num_transposes)
1987      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
1988      self._assert_trans_nchw_to_nhwc('map/while/MaxPool_1-0-2', nodes)
1989      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
1990
1991  @test_util.deprecated_graph_mode_only
1992  def testLoopWithBranch(self):
1993    if test.is_gpu_available(cuda_only=True):
1994      output = _loop_with_branch()
1995
1996      with session.Session(config=_get_config(False)) as sess:
1997        output_val_ref = self.evaluate(output)
1998
1999      with session.Session(config=_get_config()) as sess:
2000        metadata = config_pb2.RunMetadata()
2001        output_val = sess.run(output, run_metadata=metadata)
2002
2003      nodes = []
2004      num_transposes = 0
2005      for node in metadata.cost_graph.node:
2006        if _is_transpose(node.name):
2007          num_transposes += 1
2008        nodes.append(node.name)
2009
2010      expected_num_transposes = 3
2011      self.assertEqual(expected_num_transposes, num_transposes)
2012      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
2013      self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
2014      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
2015
2016  @test_util.deprecated_graph_mode_only
2017  def testLoopWithVecAnd4D(self):
2018    if test.is_gpu_available(cuda_only=True):
2019      output = _loop_with_vec_and_4d()
2020
2021      with session.Session(config=_get_config(False)) as sess:
2022        output_val_ref = self.evaluate(output)
2023
2024      with session.Session(config=_get_config()) as sess:
2025        metadata = config_pb2.RunMetadata()
2026        output_val = sess.run(output, run_metadata=metadata)
2027
2028      nodes = []
2029      num_transposes = 0
2030      for node in metadata.cost_graph.node:
2031        if _is_transpose(node.name):
2032          num_transposes += 1
2033        nodes.append(node.name)
2034
2035      expected_num_transposes = 2
2036      self.assertEqual(expected_num_transposes, num_transposes)
2037      self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes)
2038      self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes)
2039      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
2040
2041  @test_util.deprecated_graph_mode_only
2042  def testBinaryOpSecondPort(self):
2043    if test.is_gpu_available(cuda_only=True):
2044      output = _model_with_second_port()
2045
2046      with session.Session(config=_get_config(False)) as sess:
2047        output_val_ref = self.evaluate(output)
2048
2049      with session.Session(config=_get_config()) as sess:
2050        metadata = config_pb2.RunMetadata()
2051        output_val = sess.run(output, run_metadata=metadata)
2052
2053      nodes = []
2054      num_transposes = 0
2055      for node in metadata.cost_graph.node:
2056        if _is_transpose(node.name):
2057          num_transposes += 1
2058        nodes.append(node.name)
2059
2060      expected_num_transposes = 2
2061      self.assertEqual(expected_num_transposes, num_transposes)
2062      self._assert_trans_nhwc_to_nchw('FusedBatchNormV3-0', nodes)
2063      self._assert_trans_nchw_to_nhwc('Add-0-0', nodes)
2064      self.assertAllClose(output_val_ref, output_val, atol=1e-3)
2065
2066  @test_util.deprecated_graph_mode_only
2067  def testGradient(self):
2068    meta_graph = _simple_metagraph()
2069    config = config_pb2.ConfigProto()
2070    config.graph_options.rewrite_options.CopyFrom(
2071        rewriter_config_pb2.RewriterConfig(
2072            layout_optimizer=rewriter_config_pb2.RewriterConfig.ON,
2073            min_graph_nodes=-1))
2074    optimized_graph = tf_optimizer.OptimizeGraph(
2075        config, meta_graph, cluster=_get_cluster())
2076
2077    found = 0
2078    for node in optimized_graph.node:
2079      if node.op in ['Conv2D', 'Conv2DBackpropFilter', 'Conv2DBackpropInput']:
2080        found += 1
2081        self.assertEqual(node.attr['data_format'].s, b'NCHW')
2082    self.assertEqual(found, 5)
2083
2084  @test_util.deprecated_graph_mode_only
2085  def testDepthwise(self):
2086    meta_graph = _simple_metagraph(depthwise=True)
2087    config = config_pb2.ConfigProto()
2088    config.graph_options.rewrite_options.CopyFrom(
2089        rewriter_config_pb2.RewriterConfig(
2090            layout_optimizer=rewriter_config_pb2.RewriterConfig.ON,
2091            min_graph_nodes=-1))
2092    optimized_graph = tf_optimizer.OptimizeGraph(
2093        config, meta_graph, cluster=_get_cluster())
2094
2095    found = 0
2096    for node in optimized_graph.node:
2097      if node.op in [
2098          'DepthwiseConv2dNative', 'DepthwiseConv2dNativeBackpropFilter',
2099          'DepthwiseConv2dNativeBackpropInput'
2100      ]:
2101        found += 1
2102        self.assertEqual(node.attr['data_format'].s, b'NCHW')
2103    self.assertEqual(found, 6)
2104
2105  def testCheckpointCompatibility(self):
2106    if not test.is_gpu_available(cuda_only=True):
2107      self.skipTest('GPU required')
2108
2109    checkpoint_path = self.get_temp_dir()
2110    self._train(checkpoint_path)
2111    vars_expected = self._train(checkpoint_path, restore=True)
2112    vars_layout_optimized = self._train(
2113        checkpoint_path, restore=True, layout_optimizer=True)
2114
2115    for var_expected, var_layout_optimized in zip(vars_expected,
2116                                                  vars_layout_optimized):
2117      self.assertAllClose(var_expected, var_layout_optimized, atol=1e-6)
2118
2119
2120if __name__ == '__main__':
2121  test.main()
2122