xref: /aosp_15_r20/external/tensorflow/tensorflow/python/profiler/model_analyzer_test.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16import gzip
17import io
18import os
19import random
20import re
21
22import numpy as np
23
24from tensorflow.core.profiler import profile_pb2
25from tensorflow.core.protobuf import config_pb2
26from tensorflow.core.protobuf import rewriter_config_pb2
27from tensorflow.python.client import session
28from tensorflow.python.framework import dtypes
29from tensorflow.python.framework import ops
30from tensorflow.python.framework import test_util
31from tensorflow.python.ops import array_ops
32from tensorflow.python.ops import control_flow_ops
33from tensorflow.python.ops import gradients
34from tensorflow.python.ops import random_ops
35from tensorflow.python.ops import variables
36from tensorflow.python.platform import gfile
37from tensorflow.python.platform import test
38from tensorflow.python.profiler import model_analyzer
39from tensorflow.python.profiler import option_builder
40from tensorflow.python.profiler import profile_context
41from tensorflow.python.profiler.internal import model_analyzer_testlib as lib
42from tensorflow.python.util import compat
43
44builder = option_builder.ProfileOptionBuilder
45
46
47class PrintModelAnalysisTest(test.TestCase):
48
49  def _no_rewrite_session_config(self):
50    rewriter_config = rewriter_config_pb2.RewriterConfig(
51        pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF)
52    graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config)
53    return config_pb2.ConfigProto(graph_options=graph_options)
54
55  def testDumpToFile(self):
56    ops.reset_default_graph()
57    outfile = os.path.join(test.get_temp_dir(), 'dump')
58    opts = builder(builder.trainable_variables_parameter()).with_file_output(
59        outfile).build()
60
61    with session.Session(config=self._no_rewrite_session_config()) as sess:
62      _ = lib.BuildSmallModel()
63      model_analyzer.profile(sess.graph, options=opts)
64
65      with gfile.Open(outfile, 'r') as f:
66        self.assertEqual(
67            u'node name | # parameters\n'
68            '_TFProfRoot (--/451 params)\n'
69            '  DW (3x3x3x6, 162/162 params)\n'
70            '  DW2 (2x2x6x12, 288/288 params)\n'
71            '  ScalarW (1, 1/1 params)\n', lib.CheckAndRemoveDoc(f.read()))
72
73  @test_util.run_v1_only('b/120545219')
74  def testSelectEverythingDetail(self):
75    ops.reset_default_graph()
76    dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
77    outfile = os.path.join(test.get_temp_dir(), 'dump')
78    opts = (
79        builder(builder.trainable_variables_parameter()).with_file_output(
80            outfile).with_accounted_types(['.*']).select([
81                'micros', 'bytes', 'params', 'float_ops', 'occurrence',
82                'device', 'op_types', 'input_shapes'
83            ]).build())
84
85    with profile_context.ProfileContext(
86        test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx:
87      with session.Session(
88          config=self._no_rewrite_session_config()) as sess, ops.device(dev):
89        x = lib.BuildSmallModel()
90
91        self.evaluate(variables.global_variables_initializer())
92        pctx.trace_next_step()
93        pctx.dump_next_step()
94        _ = self.evaluate(x)
95
96        pctx.profiler.profile_name_scope(options=opts)
97
98        with gfile.Open(outfile, 'r') as f:
99          # pylint: disable=line-too-long
100          dump_str = lib.CheckAndRemoveDoc(f.read())
101          outputs = dump_str.split('\n')
102
103          self.assertEqual(
104              outputs[0],
105              'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes'
106          )
107          for o in outputs[1:]:
108            if o.find('Conv2D ') > 0:
109              metrics = o[o.find('(') + 1:o.find(')')].split(',')
110              # Make sure time is profiled.
111              gap = 1 if test.is_gpu_available() else 2
112              for i in range(3, 6, gap):
113                mat = re.search('(.*)(?:us|ms|sec)/(.*)(?:us|ms|sec)',
114                                metrics[i])
115                self.assertGreater(float(mat.group(1)), 0.0)
116                self.assertGreater(float(mat.group(2)), 0.0)
117              # Make sure device is profiled.
118              if test.is_gpu_available():
119                self.assertTrue(metrics[6].find('gpu') > 0)
120                self.assertFalse(metrics[6].find('cpu') > 0)
121              else:
122                self.assertFalse(metrics[6].find('gpu') > 0)
123                self.assertTrue(metrics[6].find('cpu') > 0)
124              # Make sure float_ops is profiled.
125              mat = re.search('(.*)k/(.*)k flops', metrics[1].strip())
126              self.assertGreater(float(mat.group(1)), 0.0)
127              self.assertGreater(float(mat.group(2)), 0.0)
128              # Make sure op_count is profiled.
129              self.assertEqual(metrics[8].strip(), '1/1|1/1')
130              # Make sure input_shapes is profiled.
131              self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6')
132
133            if o.find('DW (3x3x3x6') > 0:
134              metrics = o[o.find('(') + 1:o.find(')')].split(',')
135              mat = re.search('(.*)/(.*) params', metrics[1].strip())
136              self.assertGreater(float(mat.group(1)), 0.0)
137              self.assertGreater(float(mat.group(2)), 0.0)
138          # pylint: enable=line-too-long
139
140    # Test that profiler restored from profile file gives the same result.
141    gfile.Remove(outfile)
142    profile_file = os.path.join(test.get_temp_dir(), 'profile_1')
143    with lib.ProfilerFromFile(profile_file) as profiler:
144      profiler.profile_name_scope(options=opts)
145      with gfile.Open(outfile, 'r') as f:
146        self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read()))
147
148  def testSelectEverything(self):
149    ops.reset_default_graph()
150    outfile = os.path.join(test.get_temp_dir(), 'dump')
151    opts = (
152        builder(builder.trainable_variables_parameter()).with_file_output(
153            outfile).with_accounted_types(['.*']).select([
154                'params', 'float_ops', 'occurrence', 'device', 'op_types',
155                'input_shapes'
156            ]).build())
157
158    with session.Session(config=self._no_rewrite_session_config()
159                        ) as sess, ops.device('/device:CPU:0'):
160      x = lib.BuildSmallModel()
161
162      self.evaluate(variables.global_variables_initializer())
163      run_meta = config_pb2.RunMetadata()
164      _ = sess.run(
165          x,
166          options=config_pb2.RunOptions(
167              trace_level=config_pb2.RunOptions.FULL_TRACE),
168          run_metadata=run_meta)
169
170      model_analyzer.profile(sess.graph, run_meta, options=opts)
171
172  def testSimpleCodeView(self):
173    ops.reset_default_graph()
174    outfile = os.path.join(test.get_temp_dir(), 'dump')
175    # TODO(xpan): Test 'micros'. Since the execution time changes each run,
176    # it's a bit difficult to test it now.
177    opts = (
178        builder(builder.trainable_variables_parameter()).with_file_output(
179            outfile).with_accounted_types(['.*']).with_node_names(
180                show_name_regexes=['.*model_analyzer_testlib.*'
181                                  ]).account_displayed_op_only(False).select([
182                                      'bytes', 'params', 'float_ops',
183                                      'num_hidden_ops', 'device', 'input_shapes'
184                                  ]).build())
185
186    with session.Session(config=self._no_rewrite_session_config()) as sess:
187      x = lib.BuildSmallModel()
188
189      self.evaluate(variables.global_variables_initializer())
190      run_meta = config_pb2.RunMetadata()
191      _ = sess.run(
192          x,
193          options=config_pb2.RunOptions(
194              trace_level=config_pb2.RunOptions.FULL_TRACE),
195          run_metadata=run_meta)
196
197      model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts)
198
199      with gfile.Open(outfile, 'r') as f:
200        # pylint: disable=line-too-long
201        self.assertEqual(
202            'node name | requested bytes | # parameters | # float_ops | assigned devices | in',
203            lib.CheckAndRemoveDoc(f.read())[0:80])
204        # pylint: enable=line-too-long
205
206  @test_util.run_v1_only('b/120545219')
207  def testComplexCodeView(self):
208    ops.reset_default_graph()
209    outfile = os.path.join(test.get_temp_dir(), 'dump')
210    opts = (
211        builder(builder.trainable_variables_parameter()).with_file_output(
212            outfile).with_accounted_types(['.*']).with_node_names(
213                show_name_regexes=['.*model_analyzer_testlib.py.*'
214                                  ]).account_displayed_op_only(False).select(
215                                      ['params', 'float_ops']).build())
216
217    with profile_context.ProfileContext(
218        test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx:
219      with session.Session(config=self._no_rewrite_session_config()) as sess:
220        x = lib.BuildFullModel()
221
222        self.evaluate(variables.global_variables_initializer())
223        pctx.trace_next_step()
224        _ = self.evaluate(x)
225        tfprof_node = pctx.profiler.profile_python(options=opts)
226
227        # pylint: disable=line-too-long
228        with gfile.Open(outfile, 'r') as f:
229          lines = f.read().split('\n')
230          self.assertGreater(len(lines), 5)
231          result = '\n'.join(l[:min(len(l), 80)] for l in lines)
232          self.assertTrue(
233              compat.as_text(lib.CheckAndRemoveDoc(result)).startswith(
234                  'node name | # parameters | # float_ops'))
235
236        self.assertLess(0, tfprof_node.total_exec_micros)
237        self.assertEqual(2844, tfprof_node.total_parameters)
238        #The graph is modified when MKL is enabled,total_float_ops will
239        #be different
240        if test_util.IsMklEnabled():
241          self.assertLess(101600, tfprof_node.total_float_ops)
242        else:
243          self.assertLess(145660, tfprof_node.total_float_ops)
244        self.assertEqual(8, len(tfprof_node.children))
245        self.assertEqual('_TFProfRoot', tfprof_node.name)
246        self.assertEqual('model_analyzer_testlib.py:63:BuildFullModel',
247                         tfprof_node.children[0].name)
248        self.assertEqual(
249            'model_analyzer_testlib.py:63:BuildFullModel (gradient)',
250            tfprof_node.children[1].name)
251        self.assertEqual('model_analyzer_testlib.py:67:BuildFullModel',
252                         tfprof_node.children[2].name)
253        self.assertEqual(
254            'model_analyzer_testlib.py:67:BuildFullModel (gradient)',
255            tfprof_node.children[3].name)
256        self.assertEqual('model_analyzer_testlib.py:69:BuildFullModel',
257                         tfprof_node.children[4].name)
258        self.assertEqual('model_analyzer_testlib.py:70:BuildFullModel',
259                         tfprof_node.children[5].name)
260        self.assertEqual(
261            'model_analyzer_testlib.py:70:BuildFullModel (gradient)',
262            tfprof_node.children[6].name)
263        self.assertEqual('model_analyzer_testlib.py:72:BuildFullModel',
264                         tfprof_node.children[7].name)
265        # pylint: enable=line-too-long
266
267  def testCodeViewLeafGraphNode(self):
268    ops.reset_default_graph()
269    opts = (
270        builder(builder.trainable_variables_parameter()).with_empty_output()
271        .with_accounted_types(['.*']).account_displayed_op_only(False).select(
272            ['bytes', 'params', 'float_ops', 'device']).build())
273
274    with session.Session(config=self._no_rewrite_session_config()) as sess:
275      x = lib.BuildSmallModel()
276
277      self.evaluate(variables.global_variables_initializer())
278      run_meta = config_pb2.RunMetadata()
279      _ = sess.run(
280          x,
281          options=config_pb2.RunOptions(
282              trace_level=config_pb2.RunOptions.FULL_TRACE),
283          run_metadata=run_meta)
284
285      tfprof_node = model_analyzer.profile(
286          sess.graph, run_meta, cmd='code', options=opts)
287
288      leaf = tfprof_node
289      while leaf.children:
290        self.assertEqual(0, len(leaf.graph_nodes))
291        leaf = leaf.children[0]
292      self.assertEqual(1, len(leaf.graph_nodes))
293
294  def testTimeline(self):
295    ops.reset_default_graph()
296    outfile = os.path.join(test.get_temp_dir(), 'timeline')
297    opts = (
298        builder(builder.trainable_variables_parameter()).with_max_depth(100000)
299        .with_step(0).with_timeline_output(outfile).with_accounted_types(
300            ['.*']).build())
301
302    with session.Session(config=self._no_rewrite_session_config()) as sess:
303      x = lib.BuildFullModel()
304
305      self.evaluate(variables.global_variables_initializer())
306      run_meta = config_pb2.RunMetadata()
307      _ = sess.run(
308          x,
309          options=config_pb2.RunOptions(
310              trace_level=config_pb2.RunOptions.FULL_TRACE),
311          run_metadata=run_meta)
312
313      _ = model_analyzer.profile(
314          sess.graph, run_meta, cmd='graph', options=opts)
315
316      with gfile.Open(outfile + '_0', 'r') as f:
317        # Test that a json file is created.
318        # TODO(xpan): tfprof Timeline isn't quite correct on Windows.
319        # Investigate why.
320        if os.name != 'nt':
321          self.assertLess(1000, len(f.read()))
322        else:
323          self.assertLess(1, len(f.read()))
324
325  def testOpView(self):
326    ops.reset_default_graph()
327    outfile = os.path.join(test.get_temp_dir(), 'dump')
328
329    opts = (
330        builder(builder.trainable_variables_parameter()).with_file_output(
331            outfile).with_accounted_types(
332                ['.*']).with_min_occurrence(10).order_by('occurrence').select([
333                    'params', 'micros', 'bytes', 'peak_bytes', 'residual_bytes',
334                    'output_bytes', 'occurrence', 'input_shapes'
335                ]).build())
336
337    with session.Session(config=self._no_rewrite_session_config()) as sess:
338      x = lib.BuildFullModel()
339
340      self.evaluate(variables.global_variables_initializer())
341      run_meta = config_pb2.RunMetadata()
342      _ = sess.run(
343          x,
344          options=config_pb2.RunOptions(
345              trace_level=config_pb2.RunOptions.FULL_TRACE),
346          run_metadata=run_meta)
347
348      tfprof_node = model_analyzer.profile(
349          sess.graph, run_meta, cmd='op', options=opts)
350
351      with gfile.Open(outfile, 'r') as f:
352        # pylint: disable=line-too-long
353        self.assertEqual(
354            'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes',
355            lib.CheckAndRemoveDoc(f.read()).replace('\t',
356                                                    '').replace(' ', '')[0:170])
357        # pylint: enable=line-too-long
358
359      total_children = 0
360      last_occurrence = 1e32
361      input_shapes = 0
362      last_total_micros = tfprof_node.total_exec_micros
363      last_micros = tfprof_node.exec_micros
364      while tfprof_node.children:
365        for gnode in tfprof_node.graph_nodes:
366          input_shapes += len(gnode.input_shapes)
367        self.assertEqual(len(tfprof_node.children), 1)
368        tfprof_node = tfprof_node.children[0]
369
370        self.assertEqual(last_total_micros,
371                         tfprof_node.total_exec_micros + last_micros)
372        last_total_micros = tfprof_node.total_exec_micros
373        last_micros = tfprof_node.exec_micros
374
375        total_children += 1
376        self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence)
377        last_occurrence = len(tfprof_node.graph_nodes)
378
379      self.assertGreater(input_shapes, 0)
380
381  def testAdvisor(self):
382    ops.reset_default_graph()
383
384    with session.Session(config=self._no_rewrite_session_config()) as sess:
385      x = lib.BuildFullModel()
386
387      self.evaluate(variables.global_variables_initializer())
388      run_meta = config_pb2.RunMetadata()
389      _ = sess.run(
390          x,
391          options=config_pb2.RunOptions(
392              trace_level=config_pb2.RunOptions.FULL_TRACE),
393          run_metadata=run_meta)
394
395      advice_pb = model_analyzer.advise(sess.graph, run_meta)
396      self.assertTrue('AcceleratorUtilizationChecker' in advice_pb.checkers)
397      self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers)
398      self.assertTrue('OperationChecker' in advice_pb.checkers)
399
400      checker = advice_pb.checkers['AcceleratorUtilizationChecker']
401      if test.is_gpu_available():
402        self.assertGreater(len(checker.reports), 0)
403      else:
404        self.assertEqual(len(checker.reports), 0)
405      checker = advice_pb.checkers['ExpensiveOperationChecker']
406      self.assertGreater(len(checker.reports), 0)
407
408  def pprof_test_helper(self, attribute, should_fail=False):
409    ops.reset_default_graph()
410    outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz')
411    opts = (
412        builder(builder.time_and_memory()).select([
413            attribute
414        ]).with_max_depth(100000).with_node_names(
415            trim_name_regexes=['ops.py.*']).with_pprof_output(outfile).build())
416
417    with session.Session(config=self._no_rewrite_session_config()) as sess:
418      x = lib.BuildFullModel()
419
420      self.evaluate(variables.global_variables_initializer())
421      run_meta = config_pb2.RunMetadata()
422      _ = sess.run(
423          x,
424          options=config_pb2.RunOptions(
425              trace_level=config_pb2.RunOptions.FULL_TRACE),
426          run_metadata=run_meta)
427
428      _ = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts)
429
430      if should_fail:
431        self.assertFalse(gfile.Exists(outfile))
432        return
433
434      profile_pb = profile_pb2.Profile()
435      with gfile.Open(outfile, 'rb') as f:
436        with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf:
437          profile_pb.ParseFromString(gzipf.read())
438
439      self.assertGreater(len(profile_pb.sample), 10)
440      self.assertGreater(len(profile_pb.location), 10)
441      self.assertGreater(len(profile_pb.function), 10)
442      self.assertGreater(len(profile_pb.string_table), 30)
443
444      has_rnn = False
445      for s in profile_pb.string_table:
446        if s.find('rnn') > 0:
447          has_rnn = True
448        self.assertFalse(s.startswith('ops.py'))
449      self.assertTrue(has_rnn)
450
451  def testPprof(self):
452    for attr in [
453        'micros', 'bytes', 'accelerator_micros', 'cpu_micros', 'params',
454        'float_ops'
455    ]:
456      self.pprof_test_helper(attr)
457    for attr in ['op_types', 'device', 'input_shapes']:
458      self.pprof_test_helper(attr, True)
459
460  def testMinOption(self):
461    ops.reset_default_graph()
462
463    def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0):
464      for n in nodes:
465        if mm > 0:
466          self.assertGreaterEqual(n.exec_micros, mm)
467        if mam > 0:
468          self.assertGreaterEqual(n.accelerator_exec_micros, mam)
469        if mcm > 0:
470          self.assertGreaterEqual(n.cpu_exec_micros, mcm)
471        if mb > 0:
472          self.assertGreaterEqual(n.requested_bytes, mb)
473        if mpb > 0:
474          self.assertGreaterEqual(n.peak_bytes, mpb)
475        if mrb > 0:
476          self.assertGreaterEqual(n.residual_bytes, mrb)
477        if mob > 0:
478          self.assertGreaterEqual(n.output_bytes, mob)
479        check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob)
480
481    with session.Session(config=self._no_rewrite_session_config()) as sess:
482      x = lib.BuildSmallModel()
483      self.evaluate(variables.global_variables_initializer())
484      run_meta = config_pb2.RunMetadata()
485      _ = sess.run(
486          x,
487          options=config_pb2.RunOptions(
488              trace_level=config_pb2.RunOptions.FULL_TRACE),
489          run_metadata=run_meta)
490
491      min_val = random.randint(0, 10000)
492
493      opts = builder(builder.time_and_memory(
494          min_micros=min_val)).with_empty_output().build()
495      tfprof_node = model_analyzer.profile(
496          sess.graph, run_meta=run_meta, options=opts)
497      check_min(tfprof_node.children, mm=min_val)
498
499      opts = builder(builder.time_and_memory(
500          min_accelerator_micros=min_val)).with_empty_output().build()
501      tfprof_node = model_analyzer.profile(
502          sess.graph, run_meta=run_meta, options=opts)
503      check_min(tfprof_node.children, mam=min_val)
504
505      opts = builder(builder.time_and_memory(
506          min_cpu_micros=min_val)).with_empty_output().build()
507      tfprof_node = model_analyzer.profile(
508          sess.graph, run_meta=run_meta, options=opts)
509      check_min(tfprof_node.children, mcm=min_val)
510
511      opts = builder(builder.time_and_memory(
512          min_bytes=min_val)).with_empty_output().build()
513      tfprof_node = model_analyzer.profile(
514          sess.graph, run_meta=run_meta, options=opts)
515      check_min(tfprof_node.children, mb=min_val)
516
517      opts = builder(builder.time_and_memory(
518          min_peak_bytes=min_val)).with_empty_output().build()
519      tfprof_node = model_analyzer.profile(
520          sess.graph, run_meta=run_meta, options=opts)
521      check_min(tfprof_node.children, mpb=min_val)
522
523      opts = builder(builder.time_and_memory(
524          min_residual_bytes=min_val)).with_empty_output().build()
525      tfprof_node = model_analyzer.profile(
526          sess.graph, run_meta=run_meta, options=opts)
527      check_min(tfprof_node.children, mrb=min_val)
528
529      opts = builder(builder.time_and_memory(
530          min_output_bytes=min_val)).with_empty_output().build()
531      tfprof_node = model_analyzer.profile(
532          sess.graph, run_meta=run_meta, options=opts)
533      check_min(tfprof_node.children, mob=min_val)
534
535  def testSelectOption(self):
536    ops.reset_default_graph()
537    outfile = os.path.join(test.get_temp_dir(), 'dump')
538
539    def check_selection(selected, not_selected):
540      with gfile.Open(outfile, 'r') as f:
541        s = f.read()
542        for attr in selected:
543          self.assertTrue(s.find(attr) > 0, s)
544        for attr in not_selected:
545          self.assertFalse(s.find(attr) > 0, s)
546
547    with session.Session(config=self._no_rewrite_session_config()) as sess:
548      x = lib.BuildSmallModel()
549      self.evaluate(variables.global_variables_initializer())
550      run_meta = config_pb2.RunMetadata()
551      _ = sess.run(
552          x,
553          options=config_pb2.RunOptions(
554              trace_level=config_pb2.RunOptions.FULL_TRACE),
555          run_metadata=run_meta)
556
557      opts = builder(
558          builder.time_and_memory()).with_file_output(outfile).select(
559              ['micros']).build()
560      _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts)
561      check_selection(['total execution time', 'accelerator execution time'],
562                      ['bytes'])
563
564      opts = builder(
565          builder.time_and_memory()).with_file_output(outfile).select(
566              ['bytes']).build()
567      _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts)
568      check_selection(['requested bytes'],
569                      ['peak bytes', 'residual bytes', 'output bytes'])
570
571      opts = builder(
572          builder.time_and_memory()).with_file_output(outfile).select(
573              ['peak_bytes', 'residual_bytes', 'output_bytes']).build()
574      _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts)
575      check_selection(['peak bytes', 'residual bytes', 'output bytes'],
576                      ['requested_bytes'])
577
578  def _trainLoop(self, train_op, train_steps, time_dir, time_step, memory_dir,
579                 memory_step, profile_dir, dump_step):
580    with session.Session(config=self._no_rewrite_session_config()) as sess:
581      self.evaluate(variables.global_variables_initializer())
582      # start from 1 because variable_initializer took one step.
583      for i in range(1, train_steps + 1):
584        _ = self.evaluate(train_op)
585        if i in time_step:
586          ret = gfile.ListDirectory(time_dir)
587          self.assertEqual(len(ret), 1)
588          self.assertTrue(
589              gfile.Open(os.path.join(time_dir, ret[0]), 'r').read().find(
590                  'execution time') > 0)
591          _ = [gfile.Remove(os.path.join(time_dir, x)) for x in ret]
592        else:
593          self.assertEqual(len(gfile.ListDirectory(time_dir)), 0)
594        if i in memory_step:
595          ret = gfile.ListDirectory(memory_dir)
596          self.assertEqual(len(ret), 1)
597          self.assertTrue(
598              gfile.Open(os.path.join(memory_dir, ret[0]), 'r').read().find(
599                  'requested bytes') > 0)
600          _ = [gfile.Remove(os.path.join(memory_dir, x)) for x in ret]
601        else:
602          self.assertEqual(len(gfile.ListDirectory(memory_dir)), 0)
603        if i in dump_step:
604          ret = gfile.ListDirectory(profile_dir)
605          self.assertAllEqual(ret, ['profile_%d' % i])
606          _ = [gfile.Remove(os.path.join(profile_dir, x)) for x in ret]
607        else:
608          if i < dump_step[0]:
609            self.assertFalse(gfile.Exists(profile_dir))
610          else:
611            self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0)
612
613  @test_util.run_v1_only('b/120545219')
614  def testAutoProfiling(self):
615    ops.reset_default_graph()
616    time_dir = os.path.join(test.get_temp_dir(), 'time')
617    memory_dir = os.path.join(test.get_temp_dir(), 'memory')
618    profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile')
619    # TODO(xpan): Should we create parent directory for them?
620    gfile.MkDir(time_dir)
621    gfile.MkDir(memory_dir)
622
623    time_opts = (
624        builder(builder.time_and_memory()).with_file_output(
625            os.path.join(time_dir, 'profile')).select(['micros']).build())
626    memory_opts = (
627        builder(builder.time_and_memory()).with_file_output(
628            os.path.join(memory_dir, 'profile')).select(['bytes']).build())
629
630    time_steps = [2, 3]
631    memory_steps = [1, 3]
632    dump_steps = [3, 4]
633
634    x = lib.BuildSmallModel()
635    with profile_context.ProfileContext(
636        profile_dir, trace_steps=[1, 2, 3], dump_steps=[3, 4]) as pctx:
637      pctx.add_auto_profiling('scope', time_opts, time_steps)
638      pctx.add_auto_profiling('scope', memory_opts, memory_steps)
639
640      self._trainLoop(x, 10, time_dir, time_steps, memory_dir, memory_steps,
641                      profile_dir, dump_steps)
642
643  @test_util.run_v1_only('b/120545219')
644  def testOOM(self):
645    if not test.is_gpu_available():
646      return
647    ops.reset_default_graph()
648    with ops.device('/device:GPU:0'):
649      a = random_ops.random_normal([1, 10000, 20000], name='test_random1')
650      b = random_ops.random_normal([30000, 10000, 1], name='test_random2')
651      c = a * b
652
653    try:
654      with session.Session(config=self._no_rewrite_session_config()) as sess:
655        sess.run(
656            c,
657            options=config_pb2.RunOptions(
658                report_tensor_allocations_upon_oom=True))
659    except Exception as e:  # pylint: disable=broad-except
660      exception_str = '%s' % e
661      # This trace reports allocations for to random tensor.
662      self.assertTrue('OOM when allocating tensor with shape[30000,10000,20000]'
663                      in exception_str)
664      mat = re.search('(.*)GiB from test_random2/RandomStandardNormal',
665                      exception_str)
666      self.assertGreater(float(mat.group(1)), 0.0)
667      mat = re.search('(.*)MiB from test_random1/RandomStandardNormal',
668                      exception_str)
669      self.assertGreater(float(mat.group(1)), 0.0)
670
671  @test_util.run_v1_only('b/120545219')
672  def testDistributedOOM(self):
673    if not test.is_gpu_available():
674      return
675    ops.reset_default_graph()
676
677    workers, _ = test_util.create_local_cluster(2, 0)
678
679    with ops.device('/job:worker/replica:0/task:0/gpu:0'):
680      a = random_ops.random_normal([1, 10000, 20000], name='test_random1')
681    with ops.device('/job:worker/replica:0/task:1/gpu:0'):
682      b = random_ops.random_normal([30000, 10000, 1], name='test_random2')
683      c = a * b
684
685    try:
686      with session.Session(workers[1].target) as sess:
687        sess.run(
688            c,
689            options=config_pb2.RunOptions(
690                report_tensor_allocations_upon_oom=True))
691    except Exception as e:  # pylint: disable=broad-except
692      exception_str = '%s' % e
693      # test_random2 is reported because it's allocated in worker 1.
694      self.assertTrue('Current usage from device: '
695                      '/job:worker/replica:0/task:1/device:GPU:0, '
696                      'allocator: GPU_0_bfc' in exception_str)
697      mat = re.search('(.*)GiB from test_random2/RandomStandardNormal',
698                      exception_str)
699      self.assertGreater(float(mat.group(1)), 0.0)
700      # test_random1 is not reported because it's allocated in worker 0.
701      mat = re.search('(.*)MiB from test_random1/RandomStandardNormal',
702                      exception_str)
703      self.assertTrue(mat is None)
704
705  @test_util.run_v1_only('b/120545219')
706  def testTrackPersistentBytes(self):
707    ops.reset_default_graph()
708    a = array_ops.constant(np.ones((100, 100)))
709    b = array_ops.constant(np.ones((100, 100)))
710    c = a * b
711    config = config_pb2.ConfigProto()
712    config.graph_options.rewrite_options.min_graph_nodes = -1
713
714    with session.Session(config=config) as sess:
715      run_options = config_pb2.RunOptions(
716          trace_level=config_pb2.RunOptions.FULL_TRACE)
717      run_metadata = config_pb2.RunMetadata()
718      sess.run(c, options=run_options, run_metadata=run_metadata)
719
720      options = option_builder.ProfileOptionBuilder.time_and_memory()
721      options['min_bytes'] = 0
722      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
723                           'residual_bytes')
724      ret = model_analyzer.profile(
725          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
726
727      run_metadata = config_pb2.RunMetadata()
728      sess.run(c, options=run_options, run_metadata=run_metadata)
729      ret2 = model_analyzer.profile(
730          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
731
732      n = lib.SearchTFProfNode(ret, 'mul')
733      n2 = lib.SearchTFProfNode(ret2, 'mul')
734      self.assertGreater(n.peak_bytes, 0)
735      self.assertGreater(n.output_bytes, 0)
736      self.assertGreater(n.residual_bytes, 0)
737      self.assertEqual(n.peak_bytes, n2.peak_bytes)
738      self.assertEqual(n.output_bytes, n2.output_bytes)
739      self.assertEqual(n.residual_bytes, n2.residual_bytes)
740
741  @test_util.run_v1_only('b/120545219')
742  def testTraceLoopBytes(self):
743    if not test.is_gpu_available():
744      return
745    ops.reset_default_graph()
746    steps = 100
747
748    with ops.device('/gpu:0'):
749      x = array_ops.ones((100, 100), dtype=dtypes.float32)
750      n = array_ops.constant(steps, dtype=dtypes.int32)
751      x1 = array_ops.ones((100, 100))
752
753      x *= x1
754
755      def loop_body(i, x):
756        x *= x
757        return i + 1, x
758
759      _, y = control_flow_ops.while_loop(lambda i, x: i < n, loop_body,
760                                         [array_ops.constant(0), x])
761
762    grad = gradients.gradients(y, [x1])
763
764    with session.Session(config=self._no_rewrite_session_config()) as sess:
765      run_options = config_pb2.RunOptions(
766          trace_level=config_pb2.RunOptions.FULL_TRACE)
767      run_metadata = config_pb2.RunMetadata()
768      sess.run(grad, options=run_options, run_metadata=run_metadata)
769
770      options = option_builder.ProfileOptionBuilder.time_and_memory()
771      options['min_bytes'] = 0
772      options['min_micros'] = 0
773      options['select'] = ('bytes', 'peak_bytes', 'output_bytes',
774                           'residual_bytes')
775      options['output'] = 'none'
776      ret_pb = model_analyzer.profile(
777          sess.graph, run_meta=run_metadata, cmd='scope', options=options)
778      self.assertGreater(ret_pb.total_requested_bytes, 1000000)
779
780
781if __name__ == '__main__':
782  test.main()
783