xref: /aosp_15_r20/external/tensorflow/tensorflow/python/profiler/option_builder.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Utilities for building profiler options."""
16import copy
17
18from tensorflow.python.profiler import tfprof_logger
19from tensorflow.python.util.tf_export import tf_export
20
21
22@tf_export(v1=['profiler.ProfileOptionBuilder'])
23class ProfileOptionBuilder(object):
24  # pylint: disable=line-too-long
25  """Option Builder for Profiling API.
26
27  For tutorial on the options, see
28  https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md
29
30  ```python
31  # Users can use pre-built options:
32  opts = (
33      tf.profiler.ProfileOptionBuilder.trainable_variables_parameter())
34
35  # Or, build your own options:
36  opts = (tf.compat.v1.profiler.ProfileOptionBuilder()
37      .with_max_depth(10)
38      .with_min_micros(1000)
39      .select(['accelerator_micros'])
40      .with_stdout_output()
41      .build()
42
43  # Or customize the pre-built options:
44  opts = (tf.compat.v1.profiler.ProfileOptionBuilder(
45      tf.profiler.ProfileOptionBuilder.time_and_memory())
46      .with_displaying_options(show_name_regexes=['.*rnn.*'])
47      .build())
48
49  # Finally, profiling with the options:
50  _ = tf.compat.v1.profiler.profile(tf.compat.v1.get_default_graph(),
51                          run_meta=run_meta,
52                          cmd='scope',
53                          options=opts)
54  ```
55  """
56  # pylint: enable=line-too-long
57
58  def __init__(self, options=None):
59    """Constructor.
60
61    Args:
62      options: Optional initial option dict to start with.
63    """
64    if options is not None:
65      self._options = copy.deepcopy(options)
66    else:
67      self._options = {'max_depth': 100,
68                       'min_bytes': 0,
69                       'min_micros': 0,
70                       'min_params': 0,
71                       'min_float_ops': 0,
72                       'min_occurrence': 0,
73                       'order_by': 'name',
74                       'account_type_regexes': ['.*'],
75                       'start_name_regexes': ['.*'],
76                       'trim_name_regexes': [],
77                       'show_name_regexes': ['.*'],
78                       'hide_name_regexes': [],
79                       'account_displayed_op_only': False,
80                       'select': ['micros'],
81                       'step': -1,
82                       'output': 'stdout'}
83
84  @staticmethod
85  def trainable_variables_parameter():
86    """Options used to profile trainable variable parameters.
87
88    Normally used together with 'scope' view.
89
90    Returns:
91      A dict of profiling options.
92    """
93    return {'max_depth': 10000,
94            'min_bytes': 0,
95            'min_micros': 0,
96            'min_params': 0,
97            'min_float_ops': 0,
98            'min_occurrence': 0,
99            'order_by': 'name',
100            'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES],
101            'start_name_regexes': ['.*'],
102            'trim_name_regexes': [],
103            'show_name_regexes': ['.*'],
104            'hide_name_regexes': [],
105            'account_displayed_op_only': True,
106            'select': ['params'],
107            'step': -1,
108            'output': 'stdout'}
109
110  @staticmethod
111  def float_operation():
112    # pylint: disable=line-too-long
113    """Options used to profile float operations.
114
115    Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md
116    on the caveats of calculating float operations.
117
118    Returns:
119      A dict of profiling options.
120    """
121    # pylint: enable=line-too-long
122    return {'max_depth': 10000,
123            'min_bytes': 0,
124            'min_micros': 0,
125            'min_params': 0,
126            'min_float_ops': 1,
127            'min_occurrence': 0,
128            'order_by': 'float_ops',
129            'account_type_regexes': ['.*'],
130            'start_name_regexes': ['.*'],
131            'trim_name_regexes': [],
132            'show_name_regexes': ['.*'],
133            'hide_name_regexes': [],
134            'account_displayed_op_only': True,
135            'select': ['float_ops'],
136            'step': -1,
137            'output': 'stdout'}
138
139  @staticmethod
140  def time_and_memory(min_micros=1, min_bytes=1, min_accelerator_micros=0,
141                      min_cpu_micros=0, min_peak_bytes=0, min_residual_bytes=0,
142                      min_output_bytes=0):
143    """Show operation time and memory consumptions.
144
145    Args:
146      min_micros: Only show profiler nodes with execution time
147          no less than this. It sums accelerator and cpu times.
148      min_bytes: Only show profiler nodes requested to allocate no less bytes
149          than this.
150      min_accelerator_micros: Only show profiler nodes spend no less than
151          this time on accelerator (e.g. GPU).
152      min_cpu_micros: Only show profiler nodes spend no less than
153          this time on cpu.
154      min_peak_bytes: Only show profiler nodes using no less than this bytes
155          at peak (high watermark). For profiler nodes consist of multiple
156          graph nodes, it sums the graph nodes' peak_bytes.
157      min_residual_bytes: Only show profiler nodes have no less than
158          this bytes not being de-allocated after Compute() ends. For
159          profiler nodes consist of multiple graph nodes, it sums the
160          graph nodes' residual_bytes.
161      min_output_bytes: Only show profiler nodes have no less than this bytes
162          output. The output are not necessarily allocated by this profiler
163          nodes.
164    Returns:
165      A dict of profiling options.
166    """
167    return {'max_depth': 10000,
168            'min_bytes': min_bytes,
169            'min_peak_bytes': min_peak_bytes,
170            'min_residual_bytes': min_residual_bytes,
171            'min_output_bytes': min_output_bytes,
172            'min_micros': min_micros,
173            'min_accelerator_micros': min_accelerator_micros,
174            'min_cpu_micros': min_cpu_micros,
175            'min_params': 0,
176            'min_float_ops': 0,
177            'min_occurrence': 0,
178            'order_by': 'micros',
179            'account_type_regexes': ['.*'],
180            'start_name_regexes': ['.*'],
181            'trim_name_regexes': [],
182            'show_name_regexes': ['.*'],
183            'hide_name_regexes': [],
184            'account_displayed_op_only': True,
185            'select': ['micros', 'bytes'],
186            'step': -1,
187            'output': 'stdout'}
188
189  def build(self):
190    """Build a profiling option.
191
192    Returns:
193      A dict of profiling options.
194    """
195    return copy.deepcopy(self._options)
196
197  def with_max_depth(self, max_depth):
198    """Set the maximum depth of display.
199
200    The depth depends on profiling view. For 'scope' view, it's the
201    depth of name scope hierarchy (tree), for 'op' view, it's the number
202    of operation types (list), etc.
203
204    Args:
205      max_depth: Maximum depth of the data structure to display.
206    Returns:
207      self
208    """
209    self._options['max_depth'] = max_depth
210    return self
211
212  def with_min_memory(self,
213                      min_bytes=0,
214                      min_peak_bytes=0,
215                      min_residual_bytes=0,
216                      min_output_bytes=0):
217    """Only show profiler nodes consuming no less than 'min_bytes'.
218
219    Args:
220      min_bytes: Only show profiler nodes requested to allocate no less bytes
221          than this.
222      min_peak_bytes: Only show profiler nodes using no less than this bytes
223          at peak (high watermark). For profiler nodes consist of multiple
224          graph nodes, it sums the graph nodes' peak_bytes.
225      min_residual_bytes: Only show profiler nodes have no less than
226          this bytes not being de-allocated after Compute() ends. For
227          profiler nodes consist of multiple graph nodes, it sums the
228          graph nodes' residual_bytes.
229      min_output_bytes: Only show profiler nodes have no less than this bytes
230          output. The output are not necessarily allocated by this profiler
231          nodes.
232    Returns:
233      self
234    """
235    self._options['min_bytes'] = min_bytes
236    self._options['min_peak_bytes'] = min_peak_bytes
237    self._options['min_residual_bytes'] = min_residual_bytes
238    self._options['min_output_bytes'] = min_output_bytes
239    return self
240
241  def with_min_execution_time(self,
242                              min_micros=0,
243                              min_accelerator_micros=0,
244                              min_cpu_micros=0):
245    """Only show profiler nodes consuming no less than 'min_micros'.
246
247    Args:
248      min_micros: Only show profiler nodes with execution time
249          no less than this. It sums accelerator and cpu times.
250      min_accelerator_micros: Only show profiler nodes spend no less than
251          this time on accelerator (e.g. GPU).
252      min_cpu_micros: Only show profiler nodes spend no less than
253          this time on cpu.
254    Returns:
255      self
256    """
257    self._options['min_micros'] = min_micros
258    self._options['min_accelerator_micros'] = min_accelerator_micros
259    self._options['min_cpu_micros'] = min_cpu_micros
260    return self
261
262  def with_min_parameters(self, min_params):
263    """Only show profiler nodes holding no less than 'min_params' parameters.
264
265    'Parameters' normally refers the weights of in TensorFlow variables.
266    It reflects the 'capacity' of models.
267
268    Args:
269      min_params: Only show profiler nodes holding number parameters
270          no less than this.
271    Returns:
272      self
273    """
274    self._options['min_params'] = min_params
275    return self
276
277  def with_min_occurrence(self, min_occurrence):
278    # pylint: disable=line-too-long
279    """Only show profiler nodes including no less than 'min_occurrence' graph nodes.
280
281    A "node" means a profiler output node, which can be a python line
282    (code view), an operation type (op view), or a graph node
283    (graph/scope view). A python line includes all graph nodes created by that
284    line, while an operation type includes all graph nodes of that type.
285
286    Args:
287      min_occurrence: Only show nodes including no less than this.
288    Returns:
289      self
290    """
291    # pylint: enable=line-too-long
292    self._options['min_occurrence'] = min_occurrence
293    return self
294
295  def with_min_float_operations(self, min_float_ops):
296    # pylint: disable=line-too-long
297    """Only show profiler nodes consuming no less than 'min_float_ops'.
298
299    Please see https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/profile_model_architecture.md
300    on the caveats of calculating float operations.
301
302    Args:
303      min_float_ops: Only show profiler nodes with float operations
304          no less than this.
305    Returns:
306      self
307    """
308    # pylint: enable=line-too-long
309    self._options['min_float_ops'] = min_float_ops
310    return self
311
312  def with_accounted_types(self, account_type_regexes):
313    """Selectively counting statistics based on node types.
314
315    Here, 'types' means the profiler nodes' properties. Profiler by default
316    consider device name (e.g. /job:xx/.../device:GPU:0) and operation type
317    (e.g. MatMul) as profiler nodes' properties. User can also associate
318    customized 'types' to profiler nodes through OpLogProto proto.
319
320    For example, user can select profiler nodes placed on gpu:0 with:
321    `account_type_regexes=['.*gpu:0.*']`
322
323    If none of a node's properties match the specified regexes, the node is
324    not displayed nor accounted.
325
326    Args:
327      account_type_regexes: A list of regexes specifying the types.
328    Returns:
329      self.
330    """
331    self._options['account_type_regexes'] = copy.copy(account_type_regexes)
332    return self
333
334  def with_node_names(self,
335                      start_name_regexes=None,
336                      show_name_regexes=None,
337                      hide_name_regexes=None,
338                      trim_name_regexes=None):
339    """Regular expressions used to select profiler nodes to display.
340
341    After 'with_accounted_types' is evaluated, 'with_node_names' are
342    evaluated as follows:
343
344      For a profile data structure, profiler first finds the profiler
345      nodes matching 'start_name_regexes', and starts displaying profiler
346      nodes from there. Then, if a node matches 'show_name_regexes' and
347      doesn't match 'hide_name_regexes', it's displayed. If a node matches
348      'trim_name_regexes', profiler stops further searching that branch.
349
350    Args:
351      start_name_regexes: list of node name regexes to start displaying.
352      show_name_regexes: list of node names regexes to display.
353      hide_name_regexes: list of node_names regexes that should be hidden.
354      trim_name_regexes: list of node name regexes from where to stop.
355    Returns:
356      self
357    """
358    if start_name_regexes is not None:
359      self._options['start_name_regexes'] = copy.copy(start_name_regexes)
360    if show_name_regexes is not None:
361      self._options['show_name_regexes'] = copy.copy(show_name_regexes)
362    if hide_name_regexes is not None:
363      self._options['hide_name_regexes'] = copy.copy(hide_name_regexes)
364    if trim_name_regexes is not None:
365      self._options['trim_name_regexes'] = copy.copy(trim_name_regexes)
366    return self
367
368  def account_displayed_op_only(self, is_true):
369    """Whether only account the statistics of displayed profiler nodes.
370
371    Args:
372      is_true: If true, only account statistics of nodes eventually
373          displayed by the outputs.
374          Otherwise, a node's statistics are accounted by its parents
375          as long as it's types match 'account_type_regexes', even if
376          it is hidden from the output, say, by hide_name_regexes.
377    Returns:
378      self
379    """
380    self._options['account_displayed_op_only'] = is_true
381    return self
382
383  def with_empty_output(self):
384    """Do not generate side-effect outputs."""
385    self._options['output'] = 'none'
386    return self
387
388  def with_stdout_output(self):
389    """Print the result to stdout."""
390    self._options['output'] = 'stdout'
391    return self
392
393  def with_file_output(self, outfile):
394    """Print the result to a file."""
395    self._options['output'] = 'file:outfile=%s' % outfile
396    return self
397
398  def with_timeline_output(self, timeline_file):
399    """Generate a timeline json file."""
400    self._options['output'] = 'timeline:outfile=%s' % timeline_file
401    return self
402
403  def with_pprof_output(self, pprof_file):
404    """Generate a pprof profile gzip file.
405
406    To use the pprof file:
407      pprof -png --nodecount=100 --sample_index=1 <pprof_file>
408
409    Args:
410      pprof_file: filename for output, usually suffixed with .pb.gz.
411    Returns:
412      self.
413    """
414    self._options['output'] = 'pprof:outfile=%s' % pprof_file
415    return self
416
417  def order_by(self, attribute):
418    # pylint: disable=line-too-long
419    """Order the displayed profiler nodes based on a attribute.
420
421    Supported attribute includes micros, bytes, occurrence, params, etc.
422    https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md
423
424    Args:
425      attribute: An attribute the profiler node has.
426    Returns:
427      self
428    """
429    # pylint: enable=line-too-long
430    self._options['order_by'] = attribute
431    return self
432
433  def select(self, attributes):
434    # pylint: disable=line-too-long
435    """Select the attributes to display.
436
437    See https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/g3doc/options.md
438    for supported attributes.
439
440    Args:
441      attributes: A list of attribute the profiler node has.
442    Returns:
443      self
444    """
445    # pylint: enable=line-too-long
446    self._options['select'] = copy.copy(attributes)
447    return self
448
449  def with_step(self, step):
450    """Which profile step to use for profiling.
451
452    The 'step' here refers to the step defined by `Profiler.add_step()` API.
453
454    Args:
455      step: When multiple steps of profiles are available, select which step's
456         profile to use. If -1, use average of all available steps.
457    Returns:
458      self
459    """
460    self._options['step'] = step
461    return self
462