1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16import gzip 17import io 18import os 19import random 20import re 21 22import numpy as np 23 24from tensorflow.core.profiler import profile_pb2 25from tensorflow.core.protobuf import config_pb2 26from tensorflow.core.protobuf import rewriter_config_pb2 27from tensorflow.python.client import session 28from tensorflow.python.framework import dtypes 29from tensorflow.python.framework import ops 30from tensorflow.python.framework import test_util 31from tensorflow.python.ops import array_ops 32from tensorflow.python.ops import control_flow_ops 33from tensorflow.python.ops import gradients 34from tensorflow.python.ops import random_ops 35from tensorflow.python.ops import variables 36from tensorflow.python.platform import gfile 37from tensorflow.python.platform import test 38from tensorflow.python.profiler import model_analyzer 39from tensorflow.python.profiler import option_builder 40from tensorflow.python.profiler import profile_context 41from tensorflow.python.profiler.internal import model_analyzer_testlib as lib 42from tensorflow.python.util import compat 43 44builder = option_builder.ProfileOptionBuilder 45 46 47class PrintModelAnalysisTest(test.TestCase): 48 49 def _no_rewrite_session_config(self): 50 rewriter_config = rewriter_config_pb2.RewriterConfig( 51 pin_to_host_optimization=rewriter_config_pb2.RewriterConfig.OFF) 52 graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_config) 53 return config_pb2.ConfigProto(graph_options=graph_options) 54 55 def testDumpToFile(self): 56 ops.reset_default_graph() 57 outfile = os.path.join(test.get_temp_dir(), 'dump') 58 opts = builder(builder.trainable_variables_parameter()).with_file_output( 59 outfile).build() 60 61 with session.Session(config=self._no_rewrite_session_config()) as sess: 62 _ = lib.BuildSmallModel() 63 model_analyzer.profile(sess.graph, options=opts) 64 65 with gfile.Open(outfile, 'r') as f: 66 self.assertEqual( 67 u'node name | # parameters\n' 68 '_TFProfRoot (--/451 params)\n' 69 ' DW (3x3x3x6, 162/162 params)\n' 70 ' DW2 (2x2x6x12, 288/288 params)\n' 71 ' ScalarW (1, 1/1 params)\n', lib.CheckAndRemoveDoc(f.read())) 72 73 @test_util.run_v1_only('b/120545219') 74 def testSelectEverythingDetail(self): 75 ops.reset_default_graph() 76 dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0' 77 outfile = os.path.join(test.get_temp_dir(), 'dump') 78 opts = ( 79 builder(builder.trainable_variables_parameter()).with_file_output( 80 outfile).with_accounted_types(['.*']).select([ 81 'micros', 'bytes', 'params', 'float_ops', 'occurrence', 82 'device', 'op_types', 'input_shapes' 83 ]).build()) 84 85 with profile_context.ProfileContext( 86 test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: 87 with session.Session( 88 config=self._no_rewrite_session_config()) as sess, ops.device(dev): 89 x = lib.BuildSmallModel() 90 91 self.evaluate(variables.global_variables_initializer()) 92 pctx.trace_next_step() 93 pctx.dump_next_step() 94 _ = self.evaluate(x) 95 96 pctx.profiler.profile_name_scope(options=opts) 97 98 with gfile.Open(outfile, 'r') as f: 99 # pylint: disable=line-too-long 100 dump_str = lib.CheckAndRemoveDoc(f.read()) 101 outputs = dump_str.split('\n') 102 103 self.assertEqual( 104 outputs[0], 105 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes' 106 ) 107 for o in outputs[1:]: 108 if o.find('Conv2D ') > 0: 109 metrics = o[o.find('(') + 1:o.find(')')].split(',') 110 # Make sure time is profiled. 111 gap = 1 if test.is_gpu_available() else 2 112 for i in range(3, 6, gap): 113 mat = re.search('(.*)(?:us|ms|sec)/(.*)(?:us|ms|sec)', 114 metrics[i]) 115 self.assertGreater(float(mat.group(1)), 0.0) 116 self.assertGreater(float(mat.group(2)), 0.0) 117 # Make sure device is profiled. 118 if test.is_gpu_available(): 119 self.assertTrue(metrics[6].find('gpu') > 0) 120 self.assertFalse(metrics[6].find('cpu') > 0) 121 else: 122 self.assertFalse(metrics[6].find('gpu') > 0) 123 self.assertTrue(metrics[6].find('cpu') > 0) 124 # Make sure float_ops is profiled. 125 mat = re.search('(.*)k/(.*)k flops', metrics[1].strip()) 126 self.assertGreater(float(mat.group(1)), 0.0) 127 self.assertGreater(float(mat.group(2)), 0.0) 128 # Make sure op_count is profiled. 129 self.assertEqual(metrics[8].strip(), '1/1|1/1') 130 # Make sure input_shapes is profiled. 131 self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6') 132 133 if o.find('DW (3x3x3x6') > 0: 134 metrics = o[o.find('(') + 1:o.find(')')].split(',') 135 mat = re.search('(.*)/(.*) params', metrics[1].strip()) 136 self.assertGreater(float(mat.group(1)), 0.0) 137 self.assertGreater(float(mat.group(2)), 0.0) 138 # pylint: enable=line-too-long 139 140 # Test that profiler restored from profile file gives the same result. 141 gfile.Remove(outfile) 142 profile_file = os.path.join(test.get_temp_dir(), 'profile_1') 143 with lib.ProfilerFromFile(profile_file) as profiler: 144 profiler.profile_name_scope(options=opts) 145 with gfile.Open(outfile, 'r') as f: 146 self.assertEqual(dump_str, lib.CheckAndRemoveDoc(f.read())) 147 148 def testSelectEverything(self): 149 ops.reset_default_graph() 150 outfile = os.path.join(test.get_temp_dir(), 'dump') 151 opts = ( 152 builder(builder.trainable_variables_parameter()).with_file_output( 153 outfile).with_accounted_types(['.*']).select([ 154 'params', 'float_ops', 'occurrence', 'device', 'op_types', 155 'input_shapes' 156 ]).build()) 157 158 with session.Session(config=self._no_rewrite_session_config() 159 ) as sess, ops.device('/device:CPU:0'): 160 x = lib.BuildSmallModel() 161 162 self.evaluate(variables.global_variables_initializer()) 163 run_meta = config_pb2.RunMetadata() 164 _ = sess.run( 165 x, 166 options=config_pb2.RunOptions( 167 trace_level=config_pb2.RunOptions.FULL_TRACE), 168 run_metadata=run_meta) 169 170 model_analyzer.profile(sess.graph, run_meta, options=opts) 171 172 def testSimpleCodeView(self): 173 ops.reset_default_graph() 174 outfile = os.path.join(test.get_temp_dir(), 'dump') 175 # TODO(xpan): Test 'micros'. Since the execution time changes each run, 176 # it's a bit difficult to test it now. 177 opts = ( 178 builder(builder.trainable_variables_parameter()).with_file_output( 179 outfile).with_accounted_types(['.*']).with_node_names( 180 show_name_regexes=['.*model_analyzer_testlib.*' 181 ]).account_displayed_op_only(False).select([ 182 'bytes', 'params', 'float_ops', 183 'num_hidden_ops', 'device', 'input_shapes' 184 ]).build()) 185 186 with session.Session(config=self._no_rewrite_session_config()) as sess: 187 x = lib.BuildSmallModel() 188 189 self.evaluate(variables.global_variables_initializer()) 190 run_meta = config_pb2.RunMetadata() 191 _ = sess.run( 192 x, 193 options=config_pb2.RunOptions( 194 trace_level=config_pb2.RunOptions.FULL_TRACE), 195 run_metadata=run_meta) 196 197 model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) 198 199 with gfile.Open(outfile, 'r') as f: 200 # pylint: disable=line-too-long 201 self.assertEqual( 202 'node name | requested bytes | # parameters | # float_ops | assigned devices | in', 203 lib.CheckAndRemoveDoc(f.read())[0:80]) 204 # pylint: enable=line-too-long 205 206 @test_util.run_v1_only('b/120545219') 207 def testComplexCodeView(self): 208 ops.reset_default_graph() 209 outfile = os.path.join(test.get_temp_dir(), 'dump') 210 opts = ( 211 builder(builder.trainable_variables_parameter()).with_file_output( 212 outfile).with_accounted_types(['.*']).with_node_names( 213 show_name_regexes=['.*model_analyzer_testlib.py.*' 214 ]).account_displayed_op_only(False).select( 215 ['params', 'float_ops']).build()) 216 217 with profile_context.ProfileContext( 218 test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: 219 with session.Session(config=self._no_rewrite_session_config()) as sess: 220 x = lib.BuildFullModel() 221 222 self.evaluate(variables.global_variables_initializer()) 223 pctx.trace_next_step() 224 _ = self.evaluate(x) 225 tfprof_node = pctx.profiler.profile_python(options=opts) 226 227 # pylint: disable=line-too-long 228 with gfile.Open(outfile, 'r') as f: 229 lines = f.read().split('\n') 230 self.assertGreater(len(lines), 5) 231 result = '\n'.join(l[:min(len(l), 80)] for l in lines) 232 self.assertTrue( 233 compat.as_text(lib.CheckAndRemoveDoc(result)).startswith( 234 'node name | # parameters | # float_ops')) 235 236 self.assertLess(0, tfprof_node.total_exec_micros) 237 self.assertEqual(2844, tfprof_node.total_parameters) 238 #The graph is modified when MKL is enabled,total_float_ops will 239 #be different 240 if test_util.IsMklEnabled(): 241 self.assertLess(101600, tfprof_node.total_float_ops) 242 else: 243 self.assertLess(145660, tfprof_node.total_float_ops) 244 self.assertEqual(8, len(tfprof_node.children)) 245 self.assertEqual('_TFProfRoot', tfprof_node.name) 246 self.assertEqual('model_analyzer_testlib.py:63:BuildFullModel', 247 tfprof_node.children[0].name) 248 self.assertEqual( 249 'model_analyzer_testlib.py:63:BuildFullModel (gradient)', 250 tfprof_node.children[1].name) 251 self.assertEqual('model_analyzer_testlib.py:67:BuildFullModel', 252 tfprof_node.children[2].name) 253 self.assertEqual( 254 'model_analyzer_testlib.py:67:BuildFullModel (gradient)', 255 tfprof_node.children[3].name) 256 self.assertEqual('model_analyzer_testlib.py:69:BuildFullModel', 257 tfprof_node.children[4].name) 258 self.assertEqual('model_analyzer_testlib.py:70:BuildFullModel', 259 tfprof_node.children[5].name) 260 self.assertEqual( 261 'model_analyzer_testlib.py:70:BuildFullModel (gradient)', 262 tfprof_node.children[6].name) 263 self.assertEqual('model_analyzer_testlib.py:72:BuildFullModel', 264 tfprof_node.children[7].name) 265 # pylint: enable=line-too-long 266 267 def testCodeViewLeafGraphNode(self): 268 ops.reset_default_graph() 269 opts = ( 270 builder(builder.trainable_variables_parameter()).with_empty_output() 271 .with_accounted_types(['.*']).account_displayed_op_only(False).select( 272 ['bytes', 'params', 'float_ops', 'device']).build()) 273 274 with session.Session(config=self._no_rewrite_session_config()) as sess: 275 x = lib.BuildSmallModel() 276 277 self.evaluate(variables.global_variables_initializer()) 278 run_meta = config_pb2.RunMetadata() 279 _ = sess.run( 280 x, 281 options=config_pb2.RunOptions( 282 trace_level=config_pb2.RunOptions.FULL_TRACE), 283 run_metadata=run_meta) 284 285 tfprof_node = model_analyzer.profile( 286 sess.graph, run_meta, cmd='code', options=opts) 287 288 leaf = tfprof_node 289 while leaf.children: 290 self.assertEqual(0, len(leaf.graph_nodes)) 291 leaf = leaf.children[0] 292 self.assertEqual(1, len(leaf.graph_nodes)) 293 294 def testTimeline(self): 295 ops.reset_default_graph() 296 outfile = os.path.join(test.get_temp_dir(), 'timeline') 297 opts = ( 298 builder(builder.trainable_variables_parameter()).with_max_depth(100000) 299 .with_step(0).with_timeline_output(outfile).with_accounted_types( 300 ['.*']).build()) 301 302 with session.Session(config=self._no_rewrite_session_config()) as sess: 303 x = lib.BuildFullModel() 304 305 self.evaluate(variables.global_variables_initializer()) 306 run_meta = config_pb2.RunMetadata() 307 _ = sess.run( 308 x, 309 options=config_pb2.RunOptions( 310 trace_level=config_pb2.RunOptions.FULL_TRACE), 311 run_metadata=run_meta) 312 313 _ = model_analyzer.profile( 314 sess.graph, run_meta, cmd='graph', options=opts) 315 316 with gfile.Open(outfile + '_0', 'r') as f: 317 # Test that a json file is created. 318 # TODO(xpan): tfprof Timeline isn't quite correct on Windows. 319 # Investigate why. 320 if os.name != 'nt': 321 self.assertLess(1000, len(f.read())) 322 else: 323 self.assertLess(1, len(f.read())) 324 325 def testOpView(self): 326 ops.reset_default_graph() 327 outfile = os.path.join(test.get_temp_dir(), 'dump') 328 329 opts = ( 330 builder(builder.trainable_variables_parameter()).with_file_output( 331 outfile).with_accounted_types( 332 ['.*']).with_min_occurrence(10).order_by('occurrence').select([ 333 'params', 'micros', 'bytes', 'peak_bytes', 'residual_bytes', 334 'output_bytes', 'occurrence', 'input_shapes' 335 ]).build()) 336 337 with session.Session(config=self._no_rewrite_session_config()) as sess: 338 x = lib.BuildFullModel() 339 340 self.evaluate(variables.global_variables_initializer()) 341 run_meta = config_pb2.RunMetadata() 342 _ = sess.run( 343 x, 344 options=config_pb2.RunOptions( 345 trace_level=config_pb2.RunOptions.FULL_TRACE), 346 run_metadata=run_meta) 347 348 tfprof_node = model_analyzer.profile( 349 sess.graph, run_meta, cmd='op', options=opts) 350 351 with gfile.Open(outfile, 'r') as f: 352 # pylint: disable=line-too-long 353 self.assertEqual( 354 'nodename|requestedbytes|peakbytes|residualbytes|outputbytes|totalexecutiontime|acceleratorexecutiontime|cpuexecutiontime|#parameters|opoccurrence(run|defined)|inputshapes', 355 lib.CheckAndRemoveDoc(f.read()).replace('\t', 356 '').replace(' ', '')[0:170]) 357 # pylint: enable=line-too-long 358 359 total_children = 0 360 last_occurrence = 1e32 361 input_shapes = 0 362 last_total_micros = tfprof_node.total_exec_micros 363 last_micros = tfprof_node.exec_micros 364 while tfprof_node.children: 365 for gnode in tfprof_node.graph_nodes: 366 input_shapes += len(gnode.input_shapes) 367 self.assertEqual(len(tfprof_node.children), 1) 368 tfprof_node = tfprof_node.children[0] 369 370 self.assertEqual(last_total_micros, 371 tfprof_node.total_exec_micros + last_micros) 372 last_total_micros = tfprof_node.total_exec_micros 373 last_micros = tfprof_node.exec_micros 374 375 total_children += 1 376 self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence) 377 last_occurrence = len(tfprof_node.graph_nodes) 378 379 self.assertGreater(input_shapes, 0) 380 381 def testAdvisor(self): 382 ops.reset_default_graph() 383 384 with session.Session(config=self._no_rewrite_session_config()) as sess: 385 x = lib.BuildFullModel() 386 387 self.evaluate(variables.global_variables_initializer()) 388 run_meta = config_pb2.RunMetadata() 389 _ = sess.run( 390 x, 391 options=config_pb2.RunOptions( 392 trace_level=config_pb2.RunOptions.FULL_TRACE), 393 run_metadata=run_meta) 394 395 advice_pb = model_analyzer.advise(sess.graph, run_meta) 396 self.assertTrue('AcceleratorUtilizationChecker' in advice_pb.checkers) 397 self.assertTrue('ExpensiveOperationChecker' in advice_pb.checkers) 398 self.assertTrue('OperationChecker' in advice_pb.checkers) 399 400 checker = advice_pb.checkers['AcceleratorUtilizationChecker'] 401 if test.is_gpu_available(): 402 self.assertGreater(len(checker.reports), 0) 403 else: 404 self.assertEqual(len(checker.reports), 0) 405 checker = advice_pb.checkers['ExpensiveOperationChecker'] 406 self.assertGreater(len(checker.reports), 0) 407 408 def pprof_test_helper(self, attribute, should_fail=False): 409 ops.reset_default_graph() 410 outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz') 411 opts = ( 412 builder(builder.time_and_memory()).select([ 413 attribute 414 ]).with_max_depth(100000).with_node_names( 415 trim_name_regexes=['ops.py.*']).with_pprof_output(outfile).build()) 416 417 with session.Session(config=self._no_rewrite_session_config()) as sess: 418 x = lib.BuildFullModel() 419 420 self.evaluate(variables.global_variables_initializer()) 421 run_meta = config_pb2.RunMetadata() 422 _ = sess.run( 423 x, 424 options=config_pb2.RunOptions( 425 trace_level=config_pb2.RunOptions.FULL_TRACE), 426 run_metadata=run_meta) 427 428 _ = model_analyzer.profile(sess.graph, run_meta, cmd='code', options=opts) 429 430 if should_fail: 431 self.assertFalse(gfile.Exists(outfile)) 432 return 433 434 profile_pb = profile_pb2.Profile() 435 with gfile.Open(outfile, 'rb') as f: 436 with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf: 437 profile_pb.ParseFromString(gzipf.read()) 438 439 self.assertGreater(len(profile_pb.sample), 10) 440 self.assertGreater(len(profile_pb.location), 10) 441 self.assertGreater(len(profile_pb.function), 10) 442 self.assertGreater(len(profile_pb.string_table), 30) 443 444 has_rnn = False 445 for s in profile_pb.string_table: 446 if s.find('rnn') > 0: 447 has_rnn = True 448 self.assertFalse(s.startswith('ops.py')) 449 self.assertTrue(has_rnn) 450 451 def testPprof(self): 452 for attr in [ 453 'micros', 'bytes', 'accelerator_micros', 'cpu_micros', 'params', 454 'float_ops' 455 ]: 456 self.pprof_test_helper(attr) 457 for attr in ['op_types', 'device', 'input_shapes']: 458 self.pprof_test_helper(attr, True) 459 460 def testMinOption(self): 461 ops.reset_default_graph() 462 463 def check_min(nodes, mm=0, mam=0, mcm=0, mb=0, mpb=0, mrb=0, mob=0): 464 for n in nodes: 465 if mm > 0: 466 self.assertGreaterEqual(n.exec_micros, mm) 467 if mam > 0: 468 self.assertGreaterEqual(n.accelerator_exec_micros, mam) 469 if mcm > 0: 470 self.assertGreaterEqual(n.cpu_exec_micros, mcm) 471 if mb > 0: 472 self.assertGreaterEqual(n.requested_bytes, mb) 473 if mpb > 0: 474 self.assertGreaterEqual(n.peak_bytes, mpb) 475 if mrb > 0: 476 self.assertGreaterEqual(n.residual_bytes, mrb) 477 if mob > 0: 478 self.assertGreaterEqual(n.output_bytes, mob) 479 check_min(n.children, mm, mam, mcm, mb, mpb, mrb, mob) 480 481 with session.Session(config=self._no_rewrite_session_config()) as sess: 482 x = lib.BuildSmallModel() 483 self.evaluate(variables.global_variables_initializer()) 484 run_meta = config_pb2.RunMetadata() 485 _ = sess.run( 486 x, 487 options=config_pb2.RunOptions( 488 trace_level=config_pb2.RunOptions.FULL_TRACE), 489 run_metadata=run_meta) 490 491 min_val = random.randint(0, 10000) 492 493 opts = builder(builder.time_and_memory( 494 min_micros=min_val)).with_empty_output().build() 495 tfprof_node = model_analyzer.profile( 496 sess.graph, run_meta=run_meta, options=opts) 497 check_min(tfprof_node.children, mm=min_val) 498 499 opts = builder(builder.time_and_memory( 500 min_accelerator_micros=min_val)).with_empty_output().build() 501 tfprof_node = model_analyzer.profile( 502 sess.graph, run_meta=run_meta, options=opts) 503 check_min(tfprof_node.children, mam=min_val) 504 505 opts = builder(builder.time_and_memory( 506 min_cpu_micros=min_val)).with_empty_output().build() 507 tfprof_node = model_analyzer.profile( 508 sess.graph, run_meta=run_meta, options=opts) 509 check_min(tfprof_node.children, mcm=min_val) 510 511 opts = builder(builder.time_and_memory( 512 min_bytes=min_val)).with_empty_output().build() 513 tfprof_node = model_analyzer.profile( 514 sess.graph, run_meta=run_meta, options=opts) 515 check_min(tfprof_node.children, mb=min_val) 516 517 opts = builder(builder.time_and_memory( 518 min_peak_bytes=min_val)).with_empty_output().build() 519 tfprof_node = model_analyzer.profile( 520 sess.graph, run_meta=run_meta, options=opts) 521 check_min(tfprof_node.children, mpb=min_val) 522 523 opts = builder(builder.time_and_memory( 524 min_residual_bytes=min_val)).with_empty_output().build() 525 tfprof_node = model_analyzer.profile( 526 sess.graph, run_meta=run_meta, options=opts) 527 check_min(tfprof_node.children, mrb=min_val) 528 529 opts = builder(builder.time_and_memory( 530 min_output_bytes=min_val)).with_empty_output().build() 531 tfprof_node = model_analyzer.profile( 532 sess.graph, run_meta=run_meta, options=opts) 533 check_min(tfprof_node.children, mob=min_val) 534 535 def testSelectOption(self): 536 ops.reset_default_graph() 537 outfile = os.path.join(test.get_temp_dir(), 'dump') 538 539 def check_selection(selected, not_selected): 540 with gfile.Open(outfile, 'r') as f: 541 s = f.read() 542 for attr in selected: 543 self.assertTrue(s.find(attr) > 0, s) 544 for attr in not_selected: 545 self.assertFalse(s.find(attr) > 0, s) 546 547 with session.Session(config=self._no_rewrite_session_config()) as sess: 548 x = lib.BuildSmallModel() 549 self.evaluate(variables.global_variables_initializer()) 550 run_meta = config_pb2.RunMetadata() 551 _ = sess.run( 552 x, 553 options=config_pb2.RunOptions( 554 trace_level=config_pb2.RunOptions.FULL_TRACE), 555 run_metadata=run_meta) 556 557 opts = builder( 558 builder.time_and_memory()).with_file_output(outfile).select( 559 ['micros']).build() 560 _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) 561 check_selection(['total execution time', 'accelerator execution time'], 562 ['bytes']) 563 564 opts = builder( 565 builder.time_and_memory()).with_file_output(outfile).select( 566 ['bytes']).build() 567 _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) 568 check_selection(['requested bytes'], 569 ['peak bytes', 'residual bytes', 'output bytes']) 570 571 opts = builder( 572 builder.time_and_memory()).with_file_output(outfile).select( 573 ['peak_bytes', 'residual_bytes', 'output_bytes']).build() 574 _ = model_analyzer.profile(sess.graph, run_meta=run_meta, options=opts) 575 check_selection(['peak bytes', 'residual bytes', 'output bytes'], 576 ['requested_bytes']) 577 578 def _trainLoop(self, train_op, train_steps, time_dir, time_step, memory_dir, 579 memory_step, profile_dir, dump_step): 580 with session.Session(config=self._no_rewrite_session_config()) as sess: 581 self.evaluate(variables.global_variables_initializer()) 582 # start from 1 because variable_initializer took one step. 583 for i in range(1, train_steps + 1): 584 _ = self.evaluate(train_op) 585 if i in time_step: 586 ret = gfile.ListDirectory(time_dir) 587 self.assertEqual(len(ret), 1) 588 self.assertTrue( 589 gfile.Open(os.path.join(time_dir, ret[0]), 'r').read().find( 590 'execution time') > 0) 591 _ = [gfile.Remove(os.path.join(time_dir, x)) for x in ret] 592 else: 593 self.assertEqual(len(gfile.ListDirectory(time_dir)), 0) 594 if i in memory_step: 595 ret = gfile.ListDirectory(memory_dir) 596 self.assertEqual(len(ret), 1) 597 self.assertTrue( 598 gfile.Open(os.path.join(memory_dir, ret[0]), 'r').read().find( 599 'requested bytes') > 0) 600 _ = [gfile.Remove(os.path.join(memory_dir, x)) for x in ret] 601 else: 602 self.assertEqual(len(gfile.ListDirectory(memory_dir)), 0) 603 if i in dump_step: 604 ret = gfile.ListDirectory(profile_dir) 605 self.assertAllEqual(ret, ['profile_%d' % i]) 606 _ = [gfile.Remove(os.path.join(profile_dir, x)) for x in ret] 607 else: 608 if i < dump_step[0]: 609 self.assertFalse(gfile.Exists(profile_dir)) 610 else: 611 self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0) 612 613 @test_util.run_v1_only('b/120545219') 614 def testAutoProfiling(self): 615 ops.reset_default_graph() 616 time_dir = os.path.join(test.get_temp_dir(), 'time') 617 memory_dir = os.path.join(test.get_temp_dir(), 'memory') 618 profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile') 619 # TODO(xpan): Should we create parent directory for them? 620 gfile.MkDir(time_dir) 621 gfile.MkDir(memory_dir) 622 623 time_opts = ( 624 builder(builder.time_and_memory()).with_file_output( 625 os.path.join(time_dir, 'profile')).select(['micros']).build()) 626 memory_opts = ( 627 builder(builder.time_and_memory()).with_file_output( 628 os.path.join(memory_dir, 'profile')).select(['bytes']).build()) 629 630 time_steps = [2, 3] 631 memory_steps = [1, 3] 632 dump_steps = [3, 4] 633 634 x = lib.BuildSmallModel() 635 with profile_context.ProfileContext( 636 profile_dir, trace_steps=[1, 2, 3], dump_steps=[3, 4]) as pctx: 637 pctx.add_auto_profiling('scope', time_opts, time_steps) 638 pctx.add_auto_profiling('scope', memory_opts, memory_steps) 639 640 self._trainLoop(x, 10, time_dir, time_steps, memory_dir, memory_steps, 641 profile_dir, dump_steps) 642 643 @test_util.run_v1_only('b/120545219') 644 def testOOM(self): 645 if not test.is_gpu_available(): 646 return 647 ops.reset_default_graph() 648 with ops.device('/device:GPU:0'): 649 a = random_ops.random_normal([1, 10000, 20000], name='test_random1') 650 b = random_ops.random_normal([30000, 10000, 1], name='test_random2') 651 c = a * b 652 653 try: 654 with session.Session(config=self._no_rewrite_session_config()) as sess: 655 sess.run( 656 c, 657 options=config_pb2.RunOptions( 658 report_tensor_allocations_upon_oom=True)) 659 except Exception as e: # pylint: disable=broad-except 660 exception_str = '%s' % e 661 # This trace reports allocations for to random tensor. 662 self.assertTrue('OOM when allocating tensor with shape[30000,10000,20000]' 663 in exception_str) 664 mat = re.search('(.*)GiB from test_random2/RandomStandardNormal', 665 exception_str) 666 self.assertGreater(float(mat.group(1)), 0.0) 667 mat = re.search('(.*)MiB from test_random1/RandomStandardNormal', 668 exception_str) 669 self.assertGreater(float(mat.group(1)), 0.0) 670 671 @test_util.run_v1_only('b/120545219') 672 def testDistributedOOM(self): 673 if not test.is_gpu_available(): 674 return 675 ops.reset_default_graph() 676 677 workers, _ = test_util.create_local_cluster(2, 0) 678 679 with ops.device('/job:worker/replica:0/task:0/gpu:0'): 680 a = random_ops.random_normal([1, 10000, 20000], name='test_random1') 681 with ops.device('/job:worker/replica:0/task:1/gpu:0'): 682 b = random_ops.random_normal([30000, 10000, 1], name='test_random2') 683 c = a * b 684 685 try: 686 with session.Session(workers[1].target) as sess: 687 sess.run( 688 c, 689 options=config_pb2.RunOptions( 690 report_tensor_allocations_upon_oom=True)) 691 except Exception as e: # pylint: disable=broad-except 692 exception_str = '%s' % e 693 # test_random2 is reported because it's allocated in worker 1. 694 self.assertTrue('Current usage from device: ' 695 '/job:worker/replica:0/task:1/device:GPU:0, ' 696 'allocator: GPU_0_bfc' in exception_str) 697 mat = re.search('(.*)GiB from test_random2/RandomStandardNormal', 698 exception_str) 699 self.assertGreater(float(mat.group(1)), 0.0) 700 # test_random1 is not reported because it's allocated in worker 0. 701 mat = re.search('(.*)MiB from test_random1/RandomStandardNormal', 702 exception_str) 703 self.assertTrue(mat is None) 704 705 @test_util.run_v1_only('b/120545219') 706 def testTrackPersistentBytes(self): 707 ops.reset_default_graph() 708 a = array_ops.constant(np.ones((100, 100))) 709 b = array_ops.constant(np.ones((100, 100))) 710 c = a * b 711 config = config_pb2.ConfigProto() 712 config.graph_options.rewrite_options.min_graph_nodes = -1 713 714 with session.Session(config=config) as sess: 715 run_options = config_pb2.RunOptions( 716 trace_level=config_pb2.RunOptions.FULL_TRACE) 717 run_metadata = config_pb2.RunMetadata() 718 sess.run(c, options=run_options, run_metadata=run_metadata) 719 720 options = option_builder.ProfileOptionBuilder.time_and_memory() 721 options['min_bytes'] = 0 722 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 723 'residual_bytes') 724 ret = model_analyzer.profile( 725 sess.graph, run_meta=run_metadata, cmd='scope', options=options) 726 727 run_metadata = config_pb2.RunMetadata() 728 sess.run(c, options=run_options, run_metadata=run_metadata) 729 ret2 = model_analyzer.profile( 730 sess.graph, run_meta=run_metadata, cmd='scope', options=options) 731 732 n = lib.SearchTFProfNode(ret, 'mul') 733 n2 = lib.SearchTFProfNode(ret2, 'mul') 734 self.assertGreater(n.peak_bytes, 0) 735 self.assertGreater(n.output_bytes, 0) 736 self.assertGreater(n.residual_bytes, 0) 737 self.assertEqual(n.peak_bytes, n2.peak_bytes) 738 self.assertEqual(n.output_bytes, n2.output_bytes) 739 self.assertEqual(n.residual_bytes, n2.residual_bytes) 740 741 @test_util.run_v1_only('b/120545219') 742 def testTraceLoopBytes(self): 743 if not test.is_gpu_available(): 744 return 745 ops.reset_default_graph() 746 steps = 100 747 748 with ops.device('/gpu:0'): 749 x = array_ops.ones((100, 100), dtype=dtypes.float32) 750 n = array_ops.constant(steps, dtype=dtypes.int32) 751 x1 = array_ops.ones((100, 100)) 752 753 x *= x1 754 755 def loop_body(i, x): 756 x *= x 757 return i + 1, x 758 759 _, y = control_flow_ops.while_loop(lambda i, x: i < n, loop_body, 760 [array_ops.constant(0), x]) 761 762 grad = gradients.gradients(y, [x1]) 763 764 with session.Session(config=self._no_rewrite_session_config()) as sess: 765 run_options = config_pb2.RunOptions( 766 trace_level=config_pb2.RunOptions.FULL_TRACE) 767 run_metadata = config_pb2.RunMetadata() 768 sess.run(grad, options=run_options, run_metadata=run_metadata) 769 770 options = option_builder.ProfileOptionBuilder.time_and_memory() 771 options['min_bytes'] = 0 772 options['min_micros'] = 0 773 options['select'] = ('bytes', 'peak_bytes', 'output_bytes', 774 'residual_bytes') 775 options['output'] = 'none' 776 ret_pb = model_analyzer.profile( 777 sess.graph, run_meta=run_metadata, cmd='scope', options=options) 778 self.assertGreater(ret_pb.total_requested_bytes, 1000000) 779 780 781if __name__ == '__main__': 782 test.main() 783