xref: /aosp_15_r20/external/tensorflow/tensorflow/python/eager/monitoring.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""TensorFlow monitoring APIs."""
16
17import collections
18import functools
19import time
20
21from tensorflow.core.framework import summary_pb2
22from tensorflow.python import pywrap_tfe
23from tensorflow.python.client import pywrap_tf_session
24from tensorflow.python.framework import c_api_util
25from tensorflow.python.util import compat
26from tensorflow.python.util.tf_export import tf_export
27
28_MetricMethod = collections.namedtuple('MetricMethod', 'create delete get_cell')
29_counter_methods = [
30    _MetricMethod(
31        create=pywrap_tfe.TFE_MonitoringNewCounter0,
32        delete=pywrap_tfe.TFE_MonitoringDeleteCounter0,
33        get_cell=pywrap_tfe.TFE_MonitoringGetCellCounter0),
34    _MetricMethod(
35        create=pywrap_tfe.TFE_MonitoringNewCounter1,
36        delete=pywrap_tfe.TFE_MonitoringDeleteCounter1,
37        get_cell=pywrap_tfe.TFE_MonitoringGetCellCounter1),
38    _MetricMethod(
39        create=pywrap_tfe.TFE_MonitoringNewCounter2,
40        delete=pywrap_tfe.TFE_MonitoringDeleteCounter2,
41        get_cell=pywrap_tfe.TFE_MonitoringGetCellCounter2),
42]
43_int_gauge_methods = [
44    _MetricMethod(
45        create=pywrap_tfe.TFE_MonitoringNewIntGauge0,
46        delete=pywrap_tfe.TFE_MonitoringDeleteIntGauge0,
47        get_cell=pywrap_tfe.TFE_MonitoringGetCellIntGauge0),
48    _MetricMethod(
49        create=pywrap_tfe.TFE_MonitoringNewIntGauge1,
50        delete=pywrap_tfe.TFE_MonitoringDeleteIntGauge1,
51        get_cell=pywrap_tfe.TFE_MonitoringGetCellIntGauge1),
52    _MetricMethod(
53        create=pywrap_tfe.TFE_MonitoringNewIntGauge2,
54        delete=pywrap_tfe.TFE_MonitoringDeleteIntGauge2,
55        get_cell=pywrap_tfe.TFE_MonitoringGetCellIntGauge2),
56]
57_string_gauge_methods = [
58    _MetricMethod(
59        create=pywrap_tfe.TFE_MonitoringNewStringGauge0,
60        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge0,
61        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge0),
62    _MetricMethod(
63        create=pywrap_tfe.TFE_MonitoringNewStringGauge1,
64        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge1,
65        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge1),
66    _MetricMethod(
67        create=pywrap_tfe.TFE_MonitoringNewStringGauge2,
68        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge2,
69        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge2),
70    _MetricMethod(
71        create=pywrap_tfe.TFE_MonitoringNewStringGauge3,
72        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge3,
73        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge3),
74    _MetricMethod(
75        create=pywrap_tfe.TFE_MonitoringNewStringGauge4,
76        delete=pywrap_tfe.TFE_MonitoringDeleteStringGauge4,
77        get_cell=pywrap_tfe.TFE_MonitoringGetCellStringGauge4),
78]
79_bool_gauge_methods = [
80    _MetricMethod(
81        create=pywrap_tfe.TFE_MonitoringNewBoolGauge0,
82        delete=pywrap_tfe.TFE_MonitoringDeleteBoolGauge0,
83        get_cell=pywrap_tfe.TFE_MonitoringGetCellBoolGauge0),
84    _MetricMethod(
85        create=pywrap_tfe.TFE_MonitoringNewBoolGauge1,
86        delete=pywrap_tfe.TFE_MonitoringDeleteBoolGauge1,
87        get_cell=pywrap_tfe.TFE_MonitoringGetCellBoolGauge1),
88    _MetricMethod(
89        create=pywrap_tfe.TFE_MonitoringNewBoolGauge2,
90        delete=pywrap_tfe.TFE_MonitoringDeleteBoolGauge2,
91        get_cell=pywrap_tfe.TFE_MonitoringGetCellBoolGauge2),
92]
93_sampler_methods = [
94    _MetricMethod(
95        create=pywrap_tfe.TFE_MonitoringNewSampler0,
96        delete=pywrap_tfe.TFE_MonitoringDeleteSampler0,
97        get_cell=pywrap_tfe.TFE_MonitoringGetCellSampler0),
98    _MetricMethod(
99        create=pywrap_tfe.TFE_MonitoringNewSampler1,
100        delete=pywrap_tfe.TFE_MonitoringDeleteSampler1,
101        get_cell=pywrap_tfe.TFE_MonitoringGetCellSampler1),
102    _MetricMethod(
103        create=pywrap_tfe.TFE_MonitoringNewSampler2,
104        delete=pywrap_tfe.TFE_MonitoringDeleteSampler2,
105        get_cell=pywrap_tfe.TFE_MonitoringGetCellSampler2),
106]
107
108
109class Metric(object):
110  """The base class of metric."""
111
112  __slots__ = ["_metric", "_metric_name", "_metric_methods", "_label_length"]
113
114  def __init__(self, metric_name, metric_methods, label_length, *args):
115    """Creates a new metric.
116
117    Args:
118      metric_name: name of the metric class.
119      metric_methods: list of swig metric methods.
120      label_length: length of label args.
121      *args: the arguments to call create method.
122    """
123    self._metric_name = metric_name
124    self._metric_methods = metric_methods
125    self._label_length = label_length
126
127    if label_length >= len(self._metric_methods):
128      raise ValueError('Cannot create {} metric with label >= {}'.format(
129          self._metric_name, len(self._metric_methods)))
130
131    self._metric = self._metric_methods[self._label_length].create(*args)
132
133  def __del__(self):
134    try:
135      deleter = self._metric_methods[self._label_length].delete
136      metric = self._metric
137    except AttributeError:
138      return
139
140    if deleter is not None:
141      deleter(metric)
142
143  def get_cell(self, *labels):
144    """Retrieves the cell."""
145    if len(labels) != self._label_length:
146      raise ValueError('The {} expects taking {} labels'.format(
147          self._metric_name, self._label_length))
148    return self._metric_methods[self._label_length].get_cell(
149        self._metric, *labels)
150
151
152class CounterCell(object):
153  """CounterCell stores each value of a Counter."""
154
155  __slots__ = ["_cell"]
156
157  def __init__(self, cell):
158    """Creates a new CounterCell.
159
160    Args:
161      cell: A c pointer of TFE_MonitoringCounterCell.
162    """
163    self._cell = cell
164
165  def increase_by(self, value):
166    """Atomically increments the value.
167
168    Args:
169      value: non-negative value.
170    """
171    pywrap_tfe.TFE_MonitoringCounterCellIncrementBy(self._cell, value)
172
173  def value(self):
174    """Retrieves the current value."""
175    return pywrap_tfe.TFE_MonitoringCounterCellValue(self._cell)
176
177
178class Counter(Metric):
179  """A stateful class for updating a cumulative integer metric.
180
181  This class encapsulates a set of values (or a single value for a label-less
182  metric). Each value is identified by a tuple of labels. The class allows the
183  user to increment each value.
184  """
185
186  __slots__ = []
187
188  def __init__(self, name, description, *labels):
189    """Creates a new Counter.
190
191    Args:
192      name: name of the new metric.
193      description: description of the new metric.
194      *labels: The label list of the new metric.
195    """
196    super(Counter, self).__init__('Counter', _counter_methods, len(labels),
197                                  name, description, *labels)
198
199  def get_cell(self, *labels):
200    """Retrieves the cell."""
201    return CounterCell(super(Counter, self).get_cell(*labels))
202
203
204class IntGaugeCell(object):
205  """A single integer value stored in an `IntGauge`."""
206
207  __slots__ = ["_cell"]
208
209  def __init__(self, cell):
210    """Creates a new IntGaugeCell.
211
212    Args:
213      cell: A c pointer of TFE_MonitoringIntGaugeCell.
214    """
215    self._cell = cell
216
217  def set(self, value):
218    """Atomically set the value.
219
220    Args:
221      value: integer value.
222    """
223    pywrap_tfe.TFE_MonitoringIntGaugeCellSet(self._cell, value)
224
225  def value(self):
226    """Retrieves the current value."""
227    return pywrap_tfe.TFE_MonitoringIntGaugeCellValue(self._cell)
228
229
230class IntGauge(Metric):
231  """A stateful class for updating a gauge-like integer metric.
232
233  This class encapsulates a set of integer values (or a single value for a
234  label-less metric). Each value is identified by a tuple of labels. The class
235  allows the user to set each value.
236  """
237
238  __slots__ = []
239
240  def __init__(self, name, description, *labels):
241    """Creates a new IntGauge.
242
243    Args:
244      name: name of the new metric.
245      description: description of the new metric.
246      *labels: The label list of the new metric.
247    """
248    super(IntGauge, self).__init__('IntGauge', _int_gauge_methods, len(labels),
249                                   name, description, *labels)
250
251  def get_cell(self, *labels):
252    """Retrieves the cell."""
253    return IntGaugeCell(super(IntGauge, self).get_cell(*labels))
254
255
256class StringGaugeCell(object):
257  """A single string value stored in an `StringGauge`."""
258
259  __slots__ = ["_cell"]
260
261  def __init__(self, cell):
262    """Creates a new StringGaugeCell.
263
264    Args:
265      cell: A c pointer of TFE_MonitoringStringGaugeCell.
266    """
267    self._cell = cell
268
269  def set(self, value):
270    """Atomically set the value.
271
272    Args:
273      value: string value.
274    """
275    pywrap_tfe.TFE_MonitoringStringGaugeCellSet(self._cell, value)
276
277  def value(self):
278    """Retrieves the current value."""
279    with c_api_util.tf_buffer() as buffer_:
280      pywrap_tfe.TFE_MonitoringStringGaugeCellValue(self._cell, buffer_)
281      value = pywrap_tf_session.TF_GetBuffer(buffer_).decode('utf-8')
282    return value
283
284
285class StringGauge(Metric):
286  """A stateful class for updating a gauge-like string metric.
287
288  This class encapsulates a set of string values (or a single value for a
289  label-less metric). Each value is identified by a tuple of labels. The class
290  allows the user to set each value.
291  """
292
293  __slots__ = []
294
295  def __init__(self, name, description, *labels):
296    """Creates a new StringGauge.
297
298    Args:
299      name: name of the new metric.
300      description: description of the new metric.
301      *labels: The label list of the new metric.
302    """
303    super(StringGauge, self).__init__('StringGauge', _string_gauge_methods,
304                                      len(labels), name, description, *labels)
305
306  def get_cell(self, *labels):
307    """Retrieves the cell."""
308    return StringGaugeCell(super(StringGauge, self).get_cell(*labels))
309
310
311class BoolGaugeCell(object):
312  """A single boolean value stored in an `BoolGauge`."""
313
314  __slots__ = ["_cell"]
315
316  def __init__(self, cell):
317    """Creates a new BoolGaugeCell.
318
319    Args:
320      cell: A c pointer of TFE_MonitoringBoolGaugeCell.
321    """
322    self._cell = cell
323
324  def set(self, value):
325    """Atomically set the value.
326
327    Args:
328      value: bool value.
329    """
330    pywrap_tfe.TFE_MonitoringBoolGaugeCellSet(self._cell, value)
331
332  def value(self):
333    """Retrieves the current value."""
334    return pywrap_tfe.TFE_MonitoringBoolGaugeCellValue(self._cell)
335
336
337@tf_export("__internal__.monitoring.BoolGauge", v1=[])
338class BoolGauge(Metric):
339  """A stateful class for updating a gauge-like bool metric.
340
341  This class encapsulates a set of boolean values (or a single value for a
342  label-less metric). Each value is identified by a tuple of labels. The class
343  allows the user to set each value.
344  """
345
346  __slots__ = []
347
348  def __init__(self, name, description, *labels):
349    """Creates a new BoolGauge.
350
351    Args:
352      name: name of the new metric.
353      description: description of the new metric.
354      *labels: The label list of the new metric.
355    """
356    super(BoolGauge, self).__init__('BoolGauge', _bool_gauge_methods,
357                                    len(labels), name, description, *labels)
358
359  def get_cell(self, *labels):
360    """Retrieves the cell."""
361    return BoolGaugeCell(super(BoolGauge, self).get_cell(*labels))
362
363
364class SamplerCell(object):
365  """SamplerCell stores each value of a Sampler."""
366
367  __slots__ = ["_cell"]
368
369  def __init__(self, cell):
370    """Creates a new SamplerCell.
371
372    Args:
373      cell: A c pointer of TFE_MonitoringSamplerCell.
374    """
375    self._cell = cell
376
377  def add(self, value):
378    """Atomically add a sample.
379
380    Args:
381      value: float value.
382    """
383    pywrap_tfe.TFE_MonitoringSamplerCellAdd(self._cell, value)
384
385  def value(self):
386    """Retrieves the current distribution of samples.
387
388    Returns:
389      A HistogramProto describing the distribution of samples.
390    """
391    with c_api_util.tf_buffer() as buffer_:
392      pywrap_tfe.TFE_MonitoringSamplerCellValue(self._cell, buffer_)
393      proto_data = pywrap_tf_session.TF_GetBuffer(buffer_)
394    histogram_proto = summary_pb2.HistogramProto()
395    histogram_proto.ParseFromString(compat.as_bytes(proto_data))
396    return histogram_proto
397
398
399class Buckets(object):
400  """Bucketing strategies for the samplers."""
401
402  __slots__ = ["buckets"]
403
404  def __init__(self, buckets):
405    """Creates a new Buckets.
406
407    Args:
408      buckets: A c pointer of TFE_MonitoringBuckets.
409    """
410    self.buckets = buckets
411
412  def __del__(self):
413    pywrap_tfe.TFE_MonitoringDeleteBuckets(self.buckets)
414
415
416class ExponentialBuckets(Buckets):
417  """Exponential bucketing strategy.
418
419  Sets up buckets of the form:
420      [-DBL_MAX, ..., scale * growth^i,
421       scale * growth_factor^(i + 1), ..., DBL_MAX].
422  """
423
424  __slots__ = []
425
426  def __init__(self, scale, growth_factor, bucket_count):
427    """Creates a new exponential Buckets.
428
429    Args:
430      scale: float
431      growth_factor: float
432      bucket_count: integer
433    """
434    super(ExponentialBuckets, self).__init__(
435        pywrap_tfe.TFE_MonitoringNewExponentialBuckets(scale, growth_factor,
436                                                       bucket_count))
437
438
439class Sampler(Metric):
440  """A stateful class for updating a cumulative histogram metric.
441
442  This class encapsulates a set of histograms (or a single histogram for a
443  label-less metric) configured with a list of increasing bucket boundaries.
444  Each histogram is identified by a tuple of labels. The class allows the
445  user to add a sample to each histogram value.
446  """
447
448  __slots__ = []
449
450  def __init__(self, name, buckets, description, *labels):
451    """Creates a new Sampler.
452
453    Args:
454      name: name of the new metric.
455      buckets: bucketing strategy of the new metric.
456      description: description of the new metric.
457      *labels: The label list of the new metric.
458    """
459    super(Sampler, self).__init__('Sampler', _sampler_methods, len(labels),
460                                  name, buckets.buckets, description, *labels)
461
462  def get_cell(self, *labels):
463    """Retrieves the cell."""
464    return SamplerCell(super(Sampler, self).get_cell(*labels))
465
466
467class MonitoredTimer(object):
468  """A context manager to measure the walltime and increment a Counter cell."""
469
470  __slots__ = ["cell", "t"]
471
472  def __init__(self, cell):
473    """Creates a new MonitoredTimer.
474
475    Args:
476      cell: the cell associated with the time metric that will be inremented.
477    """
478    self.cell = cell
479
480  def __enter__(self):
481    self.t = time.time()
482    return self
483
484  def __exit__(self, exception_type, exception_value, traceback):
485    del exception_type, exception_value, traceback
486    micro_seconds = (time.time() - self.t) * 1000000
487    self.cell.increase_by(int(micro_seconds))
488
489
490def monitored_timer(cell):
491  """A function decorator for adding MonitoredTimer support.
492
493  Args:
494    cell: the cell associated with the time metric that will be inremented.
495  Returns:
496    A decorator that measure the function runtime and increment the specified
497    counter cell.
498  """
499
500  def actual_decorator(func):
501
502    @functools.wraps(func)
503    def wrapper(*args, **kwargs):
504      with MonitoredTimer(cell):
505        return func(*args, **kwargs)
506
507    return wrapper
508
509  return actual_decorator
510