1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Utilities for building profiler options.""" 16import copy 17 18from tensorflow.python.profiler import tfprof_logger 19from tensorflow.python.util.tf_export import tf_export 20 21 22@tf_export(v1=['profiler.ProfileOptionBuilder']) 23class ProfileOptionBuilder(object): 24 # pylint: disable=line-too-long 25 """Option Builder for Profiling API. 26 27 For tutorial on the options, see 28 https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md 29 30 ```python 31 # Users can use pre-built options: 32 opts = ( 33 tf.profiler.ProfileOptionBuilder.trainable_variables_parameter()) 34 35 # Or, build your own options: 36 opts = (tf.compat.v1.profiler.ProfileOptionBuilder() 37 .with_max_depth(10) 38 .with_min_micros(1000) 39 .select(['accelerator_micros']) 40 .with_stdout_output() 41 .build() 42 43 # Or customize the pre-built options: 44 opts = (tf.compat.v1.profiler.ProfileOptionBuilder( 45 tf.profiler.ProfileOptionBuilder.time_and_memory()) 46 .with_displaying_options(show_name_regexes=['.*rnn.*']) 47 .build()) 48 49 # Finally, profiling with the options: 50 _ = tf.compat.v1.profiler.profile(tf.compat.v1.get_default_graph(), 51 run_meta=run_meta, 52 cmd='scope', 53 options=opts) 54 ``` 55 """ 56 # pylint: enable=line-too-long 57 58 def __init__(self, options=None): 59 """Constructor. 60 61 Args: 62 options: Optional initial option dict to start with. 63 """ 64 if options is not None: 65 self._options = copy.deepcopy(options) 66 else: 67 self._options = {'max_depth': 100, 68 'min_bytes': 0, 69 'min_micros': 0, 70 'min_params': 0, 71 'min_float_ops': 0, 72 'min_occurrence': 0, 73 'order_by': 'name', 74 'account_type_regexes': ['.*'], 75 'start_name_regexes': ['.*'], 76 'trim_name_regexes': [], 77 'show_name_regexes': ['.*'], 78 'hide_name_regexes': [], 79 'account_displayed_op_only': False, 80 'select': ['micros'], 81 'step': -1, 82 'output': 'stdout'} 83 84 @staticmethod 85 def trainable_variables_parameter(): 86 """Options used to profile trainable variable parameters. 87 88 Normally used together with 'scope' view. 89 90 Returns: 91 A dict of profiling options. 92 """ 93 return {'max_depth': 10000, 94 'min_bytes': 0, 95 'min_micros': 0, 96 'min_params': 0, 97 'min_float_ops': 0, 98 'min_occurrence': 0, 99 'order_by': 'name', 100 'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES], 101 'start_name_regexes': ['.*'], 102 'trim_name_regexes': [], 103 'show_name_regexes': ['.*'], 104 'hide_name_regexes': [], 105 'account_displayed_op_only': True, 106 'select': ['params'], 107 'step': -1, 108 'output': 'stdout'} 109 110 @staticmethod 111 def float_operation(): 112 # pylint: disable=line-too-long 113 """Options used to profile float operations. 114 115 Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md 116 on the caveats of calculating float operations. 117 118 Returns: 119 A dict of profiling options. 120 """ 121 # pylint: enable=line-too-long 122 return {'max_depth': 10000, 123 'min_bytes': 0, 124 'min_micros': 0, 125 'min_params': 0, 126 'min_float_ops': 1, 127 'min_occurrence': 0, 128 'order_by': 'float_ops', 129 'account_type_regexes': ['.*'], 130 'start_name_regexes': ['.*'], 131 'trim_name_regexes': [], 132 'show_name_regexes': ['.*'], 133 'hide_name_regexes': [], 134 'account_displayed_op_only': True, 135 'select': ['float_ops'], 136 'step': -1, 137 'output': 'stdout'} 138 139 @staticmethod 140 def time_and_memory(min_micros=1, min_bytes=1, min_accelerator_micros=0, 141 min_cpu_micros=0, min_peak_bytes=0, min_residual_bytes=0, 142 min_output_bytes=0): 143 """Show operation time and memory consumptions. 144 145 Args: 146 min_micros: Only show profiler nodes with execution time 147 no less than this. It sums accelerator and cpu times. 148 min_bytes: Only show profiler nodes requested to allocate no less bytes 149 than this. 150 min_accelerator_micros: Only show profiler nodes spend no less than 151 this time on accelerator (e.g. GPU). 152 min_cpu_micros: Only show profiler nodes spend no less than 153 this time on cpu. 154 min_peak_bytes: Only show profiler nodes using no less than this bytes 155 at peak (high watermark). For profiler nodes consist of multiple 156 graph nodes, it sums the graph nodes' peak_bytes. 157 min_residual_bytes: Only show profiler nodes have no less than 158 this bytes not being de-allocated after Compute() ends. For 159 profiler nodes consist of multiple graph nodes, it sums the 160 graph nodes' residual_bytes. 161 min_output_bytes: Only show profiler nodes have no less than this bytes 162 output. The output are not necessarily allocated by this profiler 163 nodes. 164 Returns: 165 A dict of profiling options. 166 """ 167 return {'max_depth': 10000, 168 'min_bytes': min_bytes, 169 'min_peak_bytes': min_peak_bytes, 170 'min_residual_bytes': min_residual_bytes, 171 'min_output_bytes': min_output_bytes, 172 'min_micros': min_micros, 173 'min_accelerator_micros': min_accelerator_micros, 174 'min_cpu_micros': min_cpu_micros, 175 'min_params': 0, 176 'min_float_ops': 0, 177 'min_occurrence': 0, 178 'order_by': 'micros', 179 'account_type_regexes': ['.*'], 180 'start_name_regexes': ['.*'], 181 'trim_name_regexes': [], 182 'show_name_regexes': ['.*'], 183 'hide_name_regexes': [], 184 'account_displayed_op_only': True, 185 'select': ['micros', 'bytes'], 186 'step': -1, 187 'output': 'stdout'} 188 189 def build(self): 190 """Build a profiling option. 191 192 Returns: 193 A dict of profiling options. 194 """ 195 return copy.deepcopy(self._options) 196 197 def with_max_depth(self, max_depth): 198 """Set the maximum depth of display. 199 200 The depth depends on profiling view. For 'scope' view, it's the 201 depth of name scope hierarchy (tree), for 'op' view, it's the number 202 of operation types (list), etc. 203 204 Args: 205 max_depth: Maximum depth of the data structure to display. 206 Returns: 207 self 208 """ 209 self._options['max_depth'] = max_depth 210 return self 211 212 def with_min_memory(self, 213 min_bytes=0, 214 min_peak_bytes=0, 215 min_residual_bytes=0, 216 min_output_bytes=0): 217 """Only show profiler nodes consuming no less than 'min_bytes'. 218 219 Args: 220 min_bytes: Only show profiler nodes requested to allocate no less bytes 221 than this. 222 min_peak_bytes: Only show profiler nodes using no less than this bytes 223 at peak (high watermark). For profiler nodes consist of multiple 224 graph nodes, it sums the graph nodes' peak_bytes. 225 min_residual_bytes: Only show profiler nodes have no less than 226 this bytes not being de-allocated after Compute() ends. For 227 profiler nodes consist of multiple graph nodes, it sums the 228 graph nodes' residual_bytes. 229 min_output_bytes: Only show profiler nodes have no less than this bytes 230 output. The output are not necessarily allocated by this profiler 231 nodes. 232 Returns: 233 self 234 """ 235 self._options['min_bytes'] = min_bytes 236 self._options['min_peak_bytes'] = min_peak_bytes 237 self._options['min_residual_bytes'] = min_residual_bytes 238 self._options['min_output_bytes'] = min_output_bytes 239 return self 240 241 def with_min_execution_time(self, 242 min_micros=0, 243 min_accelerator_micros=0, 244 min_cpu_micros=0): 245 """Only show profiler nodes consuming no less than 'min_micros'. 246 247 Args: 248 min_micros: Only show profiler nodes with execution time 249 no less than this. It sums accelerator and cpu times. 250 min_accelerator_micros: Only show profiler nodes spend no less than 251 this time on accelerator (e.g. GPU). 252 min_cpu_micros: Only show profiler nodes spend no less than 253 this time on cpu. 254 Returns: 255 self 256 """ 257 self._options['min_micros'] = min_micros 258 self._options['min_accelerator_micros'] = min_accelerator_micros 259 self._options['min_cpu_micros'] = min_cpu_micros 260 return self 261 262 def with_min_parameters(self, min_params): 263 """Only show profiler nodes holding no less than 'min_params' parameters. 264 265 'Parameters' normally refers the weights of in TensorFlow variables. 266 It reflects the 'capacity' of models. 267 268 Args: 269 min_params: Only show profiler nodes holding number parameters 270 no less than this. 271 Returns: 272 self 273 """ 274 self._options['min_params'] = min_params 275 return self 276 277 def with_min_occurrence(self, min_occurrence): 278 # pylint: disable=line-too-long 279 """Only show profiler nodes including no less than 'min_occurrence' graph nodes. 280 281 A "node" means a profiler output node, which can be a python line 282 (code view), an operation type (op view), or a graph node 283 (graph/scope view). A python line includes all graph nodes created by that 284 line, while an operation type includes all graph nodes of that type. 285 286 Args: 287 min_occurrence: Only show nodes including no less than this. 288 Returns: 289 self 290 """ 291 # pylint: enable=line-too-long 292 self._options['min_occurrence'] = min_occurrence 293 return self 294 295 def with_min_float_operations(self, min_float_ops): 296 # pylint: disable=line-too-long 297 """Only show profiler nodes consuming no less than 'min_float_ops'. 298 299 Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md 300 on the caveats of calculating float operations. 301 302 Args: 303 min_float_ops: Only show profiler nodes with float operations 304 no less than this. 305 Returns: 306 self 307 """ 308 # pylint: enable=line-too-long 309 self._options['min_float_ops'] = min_float_ops 310 return self 311 312 def with_accounted_types(self, account_type_regexes): 313 """Selectively counting statistics based on node types. 314 315 Here, 'types' means the profiler nodes' properties. Profiler by default 316 consider device name (e.g. /job:xx/.../device:GPU:0) and operation type 317 (e.g. MatMul) as profiler nodes' properties. User can also associate 318 customized 'types' to profiler nodes through OpLogProto proto. 319 320 For example, user can select profiler nodes placed on gpu:0 with: 321 `account_type_regexes=['.*gpu:0.*']` 322 323 If none of a node's properties match the specified regexes, the node is 324 not displayed nor accounted. 325 326 Args: 327 account_type_regexes: A list of regexes specifying the types. 328 Returns: 329 self. 330 """ 331 self._options['account_type_regexes'] = copy.copy(account_type_regexes) 332 return self 333 334 def with_node_names(self, 335 start_name_regexes=None, 336 show_name_regexes=None, 337 hide_name_regexes=None, 338 trim_name_regexes=None): 339 """Regular expressions used to select profiler nodes to display. 340 341 After 'with_accounted_types' is evaluated, 'with_node_names' are 342 evaluated as follows: 343 344 For a profile data structure, profiler first finds the profiler 345 nodes matching 'start_name_regexes', and starts displaying profiler 346 nodes from there. Then, if a node matches 'show_name_regexes' and 347 doesn't match 'hide_name_regexes', it's displayed. If a node matches 348 'trim_name_regexes', profiler stops further searching that branch. 349 350 Args: 351 start_name_regexes: list of node name regexes to start displaying. 352 show_name_regexes: list of node names regexes to display. 353 hide_name_regexes: list of node_names regexes that should be hidden. 354 trim_name_regexes: list of node name regexes from where to stop. 355 Returns: 356 self 357 """ 358 if start_name_regexes is not None: 359 self._options['start_name_regexes'] = copy.copy(start_name_regexes) 360 if show_name_regexes is not None: 361 self._options['show_name_regexes'] = copy.copy(show_name_regexes) 362 if hide_name_regexes is not None: 363 self._options['hide_name_regexes'] = copy.copy(hide_name_regexes) 364 if trim_name_regexes is not None: 365 self._options['trim_name_regexes'] = copy.copy(trim_name_regexes) 366 return self 367 368 def account_displayed_op_only(self, is_true): 369 """Whether only account the statistics of displayed profiler nodes. 370 371 Args: 372 is_true: If true, only account statistics of nodes eventually 373 displayed by the outputs. 374 Otherwise, a node's statistics are accounted by its parents 375 as long as it's types match 'account_type_regexes', even if 376 it is hidden from the output, say, by hide_name_regexes. 377 Returns: 378 self 379 """ 380 self._options['account_displayed_op_only'] = is_true 381 return self 382 383 def with_empty_output(self): 384 """Do not generate side-effect outputs.""" 385 self._options['output'] = 'none' 386 return self 387 388 def with_stdout_output(self): 389 """Print the result to stdout.""" 390 self._options['output'] = 'stdout' 391 return self 392 393 def with_file_output(self, outfile): 394 """Print the result to a file.""" 395 self._options['output'] = 'file:outfile=%s' % outfile 396 return self 397 398 def with_timeline_output(self, timeline_file): 399 """Generate a timeline json file.""" 400 self._options['output'] = 'timeline:outfile=%s' % timeline_file 401 return self 402 403 def with_pprof_output(self, pprof_file): 404 """Generate a pprof profile gzip file. 405 406 To use the pprof file: 407 pprof -png --nodecount=100 --sample_index=1 <pprof_file> 408 409 Args: 410 pprof_file: filename for output, usually suffixed with .pb.gz. 411 Returns: 412 self. 413 """ 414 self._options['output'] = 'pprof:outfile=%s' % pprof_file 415 return self 416 417 def order_by(self, attribute): 418 # pylint: disable=line-too-long 419 """Order the displayed profiler nodes based on a attribute. 420 421 Supported attribute includes micros, bytes, occurrence, params, etc. 422 https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md 423 424 Args: 425 attribute: An attribute the profiler node has. 426 Returns: 427 self 428 """ 429 # pylint: enable=line-too-long 430 self._options['order_by'] = attribute 431 return self 432 433 def select(self, attributes): 434 # pylint: disable=line-too-long 435 """Select the attributes to display. 436 437 See https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md 438 for supported attributes. 439 440 Args: 441 attributes: A list of attribute the profiler node has. 442 Returns: 443 self 444 """ 445 # pylint: enable=line-too-long 446 self._options['select'] = copy.copy(attributes) 447 return self 448 449 def with_step(self, step): 450 """Which profile step to use for profiling. 451 452 The 'step' here refers to the step defined by `Profiler.add_step()` API. 453 454 Args: 455 step: When multiple steps of profiles are available, select which step's 456 profile to use. If -1, use average of all available steps. 457 Returns: 458 self 459 """ 460 self._options['step'] = step 461 return self 462