xref: /aosp_15_r20/external/tensorflow/tensorflow/python/debug/lib/debug_events_monitors_test.py (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for the debug events writer Python class."""
16from absl.testing import parameterized
17
18import numpy as np
19
20from tensorflow.core.protobuf import debug_event_pb2
21from tensorflow.python.debug.lib import debug_events_monitors
22from tensorflow.python.debug.lib import debug_events_reader
23from tensorflow.python.debug.lib import dumping_callback
24from tensorflow.python.debug.lib import dumping_callback_test_lib
25from tensorflow.python.eager import def_function
26from tensorflow.python.framework import constant_op
27from tensorflow.python.framework import dtypes
28from tensorflow.python.framework import ops
29from tensorflow.python.framework import test_util
30from tensorflow.python.ops import array_ops
31from tensorflow.python.ops import math_ops
32from tensorflow.python.platform import googletest
33from tensorflow.python.platform import test
34
35
36class TestMonitor(debug_events_monitors.BaseMonitor):
37
38  def __init__(self, debug_data_reader):
39    super(TestMonitor, self).__init__(debug_data_reader)
40    # Mapping execution index to Execution data objects.
41    self.executions = dict()
42    # Mapping graph execution trace index to GraphExecutionTrace data objects.
43    self.graph_execution_traces = dict()
44
45  def on_execution(self, execution_index, execution):
46    if execution_index in self.executions:
47      raise ValueError("Duplicate execution index: %d" % execution_index)
48    self.executions[execution_index] = execution
49
50  def on_graph_execution_trace(self, graph_execution_trace_index,
51                               graph_execution_trace):
52    if graph_execution_trace_index in self.graph_execution_traces:
53      raise ValueError("Duplicate graph-execution-trace index: %d" %
54                       graph_execution_trace_index)
55    self.graph_execution_traces[
56        graph_execution_trace_index] = graph_execution_trace
57
58
59class DebugEventsMonitorTest(dumping_callback_test_lib.DumpingCallbackTestBase,
60                             parameterized.TestCase):
61
62  @parameterized.named_parameters(
63      ("NoTensor", "NO_TENSOR"),
64      ("ConciseHealth", "CONCISE_HEALTH"),
65      ("FullHealth", "FULL_HEALTH"),
66      ("FullTensor", "FULL_TENSOR"),
67  )
68  def testOnExecutionIsCalled(self, tensor_debug_mode):
69    x = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float32)
70    y = constant_op.constant([[-1], [1]], dtype=dtypes.float32)
71    writer = dumping_callback.enable_dump_debug_info(
72        self.dump_root, tensor_debug_mode=tensor_debug_mode)
73    math_ops.matmul(x, y)
74    writer.FlushNonExecutionFiles()
75    writer.FlushExecutionFiles()
76
77    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
78      test_monitor = TestMonitor(reader)
79      reader.update()
80      self.assertLen(test_monitor.executions, 1)
81      self.assertEmpty(test_monitor.graph_execution_traces)
82      execution = test_monitor.executions[0]
83      self.assertTrue(execution.wall_time)
84      self.assertEqual(execution.op_type, "MatMul")
85      self.assertLen(execution.output_tensor_device_ids, 1)
86      self.assertLen(execution.input_tensor_ids, 2)
87      self.assertLen(execution.output_tensor_ids, 1)
88      self.assertEqual(execution.num_outputs, 1)
89      self.assertEqual(execution.graph_id, "")
90      if tensor_debug_mode == "NO_TENSOR":
91        self.assertIsNone(execution.debug_tensor_values)
92      elif tensor_debug_mode == "CONCISE_HEALTH":
93        self.assertLen(execution.debug_tensor_values, 1)
94        # [tensor_id, element_count, neg_inf_count, pos_inf_count, nan_count].
95        self.assertLen(execution.debug_tensor_values[0], 5)
96      elif tensor_debug_mode == "FULL_HEALTH":
97        self.assertLen(execution.debug_tensor_values, 1)
98        # [tensor_id, device_id, dtype, rank, element_count,
99        #  neg_inf_count, pos_inf_count, nan_count,
100        #  neg_finite_count, zero_count, pos_finite_count].
101        self.assertLen(execution.debug_tensor_values[0], 11)
102      elif tensor_debug_mode == "FULL_TENSOR":
103        # Full tensor values are not stored in the debug_tensor_values field.
104        self.assertIsNone(execution.debug_tensor_values)
105        self.assertAllClose(
106            reader.execution_to_tensor_values(execution), [[[1.], [1.]]])
107
108  @parameterized.named_parameters(
109      ("ConciseHealth", "CONCISE_HEALTH"),
110      ("FullHealth", "FULL_HEALTH"),
111      ("FullTensor", "FULL_TENSOR"),
112  )
113  def testOnGraphExecutionTraceIsCalled(self, tensor_debug_mode):
114    xs = constant_op.constant([2., 6., 8., 1., 2.], dtype=dtypes.float32)
115    writer = dumping_callback.enable_dump_debug_info(
116        self.dump_root, tensor_debug_mode=tensor_debug_mode)
117
118    @def_function.function
119    def unique_sum(xs):
120      """Sum over the unique values, for testing."""
121      unique_xs, indices = array_ops.unique(xs)
122      return math_ops.reduce_sum(unique_xs), indices
123
124    unique_sum(xs)
125    writer.FlushNonExecutionFiles()
126    writer.FlushExecutionFiles()
127
128    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
129      test_monitor = TestMonitor(reader)
130      reader.update()
131      self.assertLen(test_monitor.executions, 1)
132
133      execution = test_monitor.executions[0]
134      self.assertTrue(execution.wall_time)
135      self.assertStartsWith(execution.op_type, "__inference_unique_sum")
136      self.assertLen(execution.output_tensor_device_ids, 2)
137      self.assertLen(execution.input_tensor_ids, 1)
138      self.assertLen(execution.output_tensor_ids, 2)
139      self.assertEqual(execution.num_outputs, 2)
140      self.assertTrue(execution.graph_id)
141
142      traces = test_monitor.graph_execution_traces
143      if tensor_debug_mode == "CONCISE_HEALTH":
144        self.assertLen(traces, 3)  # [Placeholder:0, Unique:0 , Sum:0].
145        self.assertEqual(traces[0].op_type, "Placeholder")
146        self.assertEqual(traces[0].output_slot, 0)
147        self.assertEqual(traces[1].op_type, "Unique")
148        self.assertEqual(traces[1].output_slot, 0)
149        # Unique:1 is not traced under CONCISE_HEALTH mode, as it's int-dtype.
150        self.assertEqual(traces[2].op_type, "Sum")
151        self.assertEqual(traces[2].output_slot, 0)
152        # [tensor_id, element_count, neg_inf_count, pos_inf_count, nan_count].
153        self.assertLen(traces[0].debug_tensor_value, 5)
154        self.assertLen(traces[1].debug_tensor_value, 5)
155        self.assertLen(traces[2].debug_tensor_value, 5)
156      elif tensor_debug_mode == "FULL_HEALTH":
157        self.assertLen(traces, 3)  # [Placeholder:0, Unique:0 , Sum:0].
158        self.assertEqual(traces[0].op_type, "Placeholder")
159        self.assertEqual(traces[0].output_slot, 0)
160        self.assertEqual(traces[1].op_type, "Unique")
161        self.assertEqual(traces[1].output_slot, 0)
162        # Unique:1 is not traced under FULL_HEALTH mode, as it's int-dtype.
163        self.assertEqual(traces[2].op_type, "Sum")
164        self.assertEqual(traces[2].output_slot, 0)
165        # [tensor_id, device_id, dtype, rank, element_count,
166        #  neg_inf_count, pos_inf_count, nan_count,
167        #  neg_finite_count, zero_count, pos_finite_count].
168        self.assertLen(traces[0].debug_tensor_value, 11)
169        self.assertLen(traces[1].debug_tensor_value, 11)
170        self.assertLen(traces[2].debug_tensor_value, 11)
171      elif tensor_debug_mode == "FULL_TENSOR":
172        # [Placeholder:0, Unique:0, Unique:1, Const:0, Sum:0].
173        self.assertLen(traces, 5)
174        self.assertEqual(traces[0].op_type, "Placeholder")
175        self.assertEqual(traces[0].output_slot, 0)
176        self.assertIsNone(traces[0].debug_tensor_value)
177        self.assertAllEqual(
178            reader.graph_execution_trace_to_tensor_value(traces[0]),
179            [2., 6., 8., 1., 2.])
180        self.assertEqual(traces[1].op_type, "Unique")
181        self.assertEqual(traces[1].output_slot, 0)
182        self.assertIsNone(traces[1].debug_tensor_value)
183        self.assertAllEqual(
184            reader.graph_execution_trace_to_tensor_value(traces[1]),
185            [2., 6., 8., 1.])
186        self.assertEqual(traces[2].op_type, "Unique")
187        self.assertEqual(traces[2].output_slot, 1)
188        self.assertIsNone(traces[2].debug_tensor_value)
189        self.assertAllEqual(
190            reader.graph_execution_trace_to_tensor_value(traces[2]),
191            [0, 1, 2, 3, 0])
192        self.assertEqual(traces[3].op_type, "Const")
193        self.assertEqual(traces[3].output_slot, 0)
194        self.assertIsNone(traces[3].debug_tensor_value)
195        self.assertAllClose(
196            reader.graph_execution_trace_to_tensor_value(traces[3]), [0])
197        self.assertEqual(traces[4].op_type, "Sum")
198        self.assertEqual(traces[4].output_slot, 0)
199        self.assertIsNone(traces[4].debug_tensor_value)
200        self.assertAllClose(
201            reader.graph_execution_trace_to_tensor_value(traces[4]), 17.)
202
203
204class AlertDataObjectsTest(test_util.TensorFlowTestCase):
205  """Unit tests for alert-class objects."""
206
207  def testInfNanMonitor(self):
208    alert = debug_events_monitors.InfNanAlert(
209        1234,
210        "FooOp",
211        1,
212        size=1000,
213        num_neg_inf=5,
214        num_pos_inf=10,
215        num_nan=20,
216        execution_index=777,
217        graph_execution_trace_index=888)
218    self.assertEqual(alert.wall_time, 1234)
219    self.assertEqual(alert.op_type, "FooOp")
220    self.assertEqual(alert.output_slot, 1)
221    self.assertEqual(alert.size, 1000)
222    self.assertEqual(alert.num_neg_inf, 5)
223    self.assertEqual(alert.num_pos_inf, 10)
224    self.assertEqual(alert.num_nan, 20)
225    self.assertEqual(alert.execution_index, 777)
226    self.assertEqual(alert.graph_execution_trace_index, 888)
227
228
229class InfNanMonitorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
230
231  def testInfNanMonitorStartsWithEmptyAlerts(self):
232    mock_reader = test.mock.MagicMock()
233    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
234    self.assertEmpty(monitor.alerts())
235
236  def testInfNanMonitorOnExecutionUnderCurtHealthMode(self):
237    mock_reader = test.mock.MagicMock()
238    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
239    execution_digest = debug_events_reader.ExecutionDigest(
240        1234, 1, "FooOp", output_tensor_device_ids=[0, 1])
241    execution = debug_events_reader.Execution(
242        execution_digest,
243        "worker01", ["a1", "b2", "e3"],
244        debug_event_pb2.TensorDebugMode.CURT_HEALTH,
245        graph_id=None,
246        input_tensor_ids=[12, 34],
247        output_tensor_ids=[56, 78],
248        debug_tensor_values=[[-1, 0], [-1, 1]])  # [tensor_id, any_inf_nan].
249    monitor.on_execution(50, execution)
250
251    self.assertLen(monitor.alerts(), 1)
252    alert = monitor.alerts()[0]
253    self.assertEqual(alert.wall_time, 1234)
254    self.assertEqual(alert.op_type, "FooOp")
255    self.assertEqual(alert.output_slot, 1)
256    # The four fields below are unavailable under CURT_HEALTH mode by design.
257    self.assertIsNone(alert.size)
258    self.assertIsNone(alert.num_neg_inf)
259    self.assertIsNone(alert.num_pos_inf)
260    self.assertIsNone(alert.num_nan)
261    self.assertEqual(alert.execution_index, 50)
262    self.assertIsNone(alert.graph_execution_trace_index)
263
264  @parameterized.named_parameters(
265      ("ConciseHealth",
266       debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
267       # [tensor_id, size, num_neg_inf, num_pos_inf, num_nan].
268       [[-1, 10, 1, 2, 3],
269        [-1, 100, 0, 0, 0]]),
270      ("FullHealth",
271       debug_event_pb2.TensorDebugMode.FULL_HEALTH,
272       # [tensor_id, device_id, dtype, rank, element_count,
273       #  neg_inf_count, pos_inf_count, nan_count,
274       #  neg_finite_count, zero_count, pos_finite_count].
275       [[-1, -1, 1, 1, 10, 1, 2, 3, 0, 0, 0],
276        [-1, -1, 1, 1, 100, 0, 0, 0, 10, 30, 60]]),
277  )
278  def testInfNanMonitorOnExecutionUnderHealthMode(self,
279                                                  tensor_debug_mode,
280                                                  debug_tensor_values):
281    mock_reader = test.mock.MagicMock()
282    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
283    execution_digest = debug_events_reader.ExecutionDigest(
284        1234, 1, "BarOp", output_tensor_device_ids=[0, 1])
285
286    execution = debug_events_reader.Execution(
287        execution_digest,
288        "worker01",
289        ["a1", "b2", "e3"],
290        tensor_debug_mode,
291        graph_id=None,
292        input_tensor_ids=[12, 34],
293        output_tensor_ids=[56, 78],
294        debug_tensor_values=debug_tensor_values)
295    monitor.on_execution(60, execution)
296
297    self.assertLen(monitor.alerts(), 1)
298    alert = monitor.alerts()[0]
299    self.assertEqual(alert.wall_time, 1234)
300    self.assertEqual(alert.op_type, "BarOp")
301    self.assertEqual(alert.output_slot, 0)
302    self.assertEqual(alert.size, 10)
303    self.assertEqual(alert.num_neg_inf, 1)
304    self.assertEqual(alert.num_pos_inf, 2)
305    self.assertEqual(alert.num_nan, 3)
306    self.assertEqual(alert.execution_index, 60)
307    self.assertIsNone(alert.graph_execution_trace_index)
308
309  @parameterized.named_parameters(
310      ("Shape",
311       debug_event_pb2.TensorDebugMode.SHAPE,
312       # [tensor_id, dtype, rank, element_cont, ...shape_truncate_6]
313       [[-1, 1, 2, 6, 3, 2, 0, 0, 0, 0],
314        [-1, 10, 1, 7, 7, 0, 0, 0, 0, 0]]),
315  )
316  def testInfNanMonitorOnExecutionUnderModeWithNoInfNanInfo(
317      self,
318      tensor_debug_mode,
319      debug_tensor_values):
320    mock_reader = test.mock.MagicMock()
321    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
322    execution_digest = debug_events_reader.ExecutionDigest(
323        1234, 1, "BarOp", output_tensor_device_ids=[0, 1])
324
325    execution = debug_events_reader.Execution(
326        execution_digest,
327        "worker01",
328        ["a1", "b2", "e3"],
329        tensor_debug_mode,
330        graph_id=None,
331        input_tensor_ids=[12, 34],
332        output_tensor_ids=[56, 78],
333        debug_tensor_values=debug_tensor_values)
334    monitor.on_execution(60, execution)
335
336    self.assertEmpty(monitor.alerts())
337
338  @parameterized.named_parameters(
339      ("FloatsScalarWithInfAndNan", np.inf, np.float32, 1, 0, 1, 0),
340      ("Floats2DWithInfAndNan", [[0, np.nan, np.nan, -np.inf]
341                                ], np.float32, 4, 1, 0, 2),
342      ("Floats1DWithoutInfOrNan", [0, -1e6, 1e6, 9e5], np.float32, 4, 0, 0, 0),
343      ("Integers", [[0, 1000, -200, -300]], np.int32, 4, 0, 0, 0),
344      ("Booleans", [False, True, False, False], np.int32, 4, 0, 0, 0),
345  )
346  def testInfNanMonitorOnExecutionUnderFullTensorModeWorks(
347      self, tensor_value, dtype, expected_size, expected_num_neg_inf,
348      expected_num_pos_inf, expected_num_nan):
349    mock_reader = test.mock.MagicMock()
350    mock_reader.execution_to_tensor_values.return_value = [
351        np.array([[0.0, -1.0, 1.0]]),
352        np.array(tensor_value, dtype=dtype)
353    ]
354    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
355    execution_digest = debug_events_reader.ExecutionDigest(
356        1234,
357        1,
358        "__inference_bar_function_1234",
359        output_tensor_device_ids=[0, 1])
360    execution = debug_events_reader.Execution(
361        execution_digest,
362        "worker01", ["a1", "b2", "e3"],
363        debug_event_pb2.TensorDebugMode.FULL_TENSOR,
364        graph_id=None,
365        input_tensor_ids=[12, 34],
366        output_tensor_ids=[56, 78])
367    monitor.on_execution(70, execution)
368
369    if expected_num_neg_inf or expected_num_pos_inf or expected_num_nan:
370      self.assertLen(monitor.alerts(), 1)
371      alert = monitor.alerts()[0]
372      self.assertEqual(alert.wall_time, 1234)
373      self.assertEqual(alert.op_type, "__inference_bar_function_1234")
374      self.assertEqual(alert.output_slot, 1)
375      self.assertEqual(alert.size, expected_size)
376      self.assertEqual(alert.num_neg_inf, expected_num_neg_inf)
377      self.assertEqual(alert.num_pos_inf, expected_num_pos_inf)
378      self.assertEqual(alert.num_nan, expected_num_nan)
379      self.assertEqual(alert.execution_index, 70)
380      self.assertIsNone(alert.graph_execution_trace_index, 70)
381    else:
382      self.assertEmpty(monitor.alerts())
383
384  def testInfNaNMonitorOnGraphExecutionTraceCurtHealthMode(self):
385    mock_reader = test.mock.MagicMock()
386    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
387    trace_digest = debug_events_reader.GraphExecutionTraceDigest(
388        1234, 1, "FooOp", "FooOp_1", 2, "g1")
389    trace = debug_events_reader.GraphExecutionTrace(
390        trace_digest, ["g0", "g1"],
391        debug_event_pb2.TensorDebugMode.CURT_HEALTH,
392        debug_tensor_value=[9, 1])  # [tensor_id, any_inf_nan].
393    monitor.on_graph_execution_trace(55, trace)
394    self.assertLen(monitor.alerts(), 1)
395    alert = monitor.alerts()[0]
396    self.assertEqual(alert.wall_time, 1234)
397    self.assertEqual(alert.op_type, "FooOp")
398    self.assertEqual(alert.output_slot, 2)
399    # The four fields below are unavailable under CURT_HEALTH mode by design.
400    self.assertIsNone(alert.size)
401    self.assertIsNone(alert.num_neg_inf)
402    self.assertIsNone(alert.num_pos_inf)
403    self.assertIsNone(alert.num_nan)
404    self.assertIsNone(alert.execution_index)
405    self.assertEqual(alert.graph_execution_trace_index, 55)
406
407  def testInfNaNMonitorOnGraphExecutionTraceConciseHealthMode(self):
408    mock_reader = test.mock.MagicMock()
409    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
410    trace_digest = debug_events_reader.GraphExecutionTraceDigest(
411        1234, 1, "FooOp", "FooOp_1", 2, "g1")
412    trace = debug_events_reader.GraphExecutionTrace(
413        trace_digest,
414        ["g0", "g1"],
415        debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
416        # [tensor_id, size, num_neg_inf, num_pos_inf, num_nan].
417        debug_tensor_value=[9, 100, 3, 2, 1])
418    monitor.on_graph_execution_trace(55, trace)
419
420    self.assertLen(monitor.alerts(), 1)
421    alert = monitor.alerts()[0]
422    self.assertEqual(alert.wall_time, 1234)
423    self.assertEqual(alert.op_type, "FooOp")
424    self.assertEqual(alert.output_slot, 2)
425    self.assertEqual(alert.size, 100)
426    self.assertEqual(alert.num_neg_inf, 3)
427    self.assertEqual(alert.num_pos_inf, 2)
428    self.assertEqual(alert.num_nan, 1)
429    self.assertEqual(alert.graph_execution_trace_index, 55)
430
431  @parameterized.named_parameters(
432      ("FloatsScalarWithInfAndNan", np.inf, np.float32, 1, 0, 1, 0),
433      ("Floats2DWithInfAndNan", [[0, np.nan, np.nan, -np.inf]
434                                ], np.float32, 4, 1, 0, 2),
435      ("Floats1DWithoutInfOrNan", [0, -1e6, 1e6, 9e5], np.float32, 4, 0, 0, 0),
436      ("Integers", [[0, 1000, -200, -300]], np.int32, 4, 0, 0, 0),
437      ("Booleans", [False, True, False, False], np.int32, 4, 0, 0, 0),
438  )
439  def testInfNanMonitorOnGraphExecutionTraceUnderFullTensorModeWorks(
440      self, tensor_value, dtype, expected_size, expected_num_neg_inf,
441      expected_num_pos_inf, expected_num_nan):
442    mock_reader = test.mock.MagicMock()
443    mock_reader.graph_execution_trace_to_tensor_value.return_value = np.array(
444        tensor_value, dtype=dtype)
445    monitor = debug_events_monitors.InfNanMonitor(mock_reader)
446    trace_digest = debug_events_reader.GraphExecutionTraceDigest(
447        1234, 1, "BazOp", "name_scope_3/BazOp_1", 2, "g1")
448    trace = debug_events_reader.GraphExecutionTrace(
449        trace_digest, ["g0", "g1"], debug_event_pb2.TensorDebugMode.FULL_TENSOR)
450    monitor.on_graph_execution_trace(80, trace)
451
452    if expected_num_neg_inf or expected_num_pos_inf or expected_num_nan:
453      self.assertLen(monitor.alerts(), 1)
454      alert = monitor.alerts()[0]
455      self.assertEqual(alert.wall_time, 1234)
456      self.assertEqual(alert.op_type, "BazOp")
457      self.assertEqual(alert.output_slot, 2)
458      self.assertEqual(alert.size, expected_size)
459      self.assertEqual(alert.num_neg_inf, expected_num_neg_inf)
460      self.assertEqual(alert.num_pos_inf, expected_num_pos_inf)
461      self.assertEqual(alert.num_nan, expected_num_nan)
462      self.assertIsNone(alert.execution_index)
463      self.assertEqual(alert.graph_execution_trace_index, 80)
464    else:
465      self.assertEmpty(monitor.alerts())
466
467  def testLimitingInfNanMonitorAlertCountWorks(self):
468    mock_reader = test.mock.MagicMock()
469    monitor = debug_events_monitors.InfNanMonitor(mock_reader, limit=3)
470    for i in range(10):
471      execution_digest = debug_events_reader.ExecutionDigest(
472          i * 1000, 1, "FooOp", output_tensor_device_ids=[0, 1])
473      execution = debug_events_reader.Execution(
474          execution_digest,
475          "worker01", ["a1", "b2", "e3"],
476          debug_event_pb2.TensorDebugMode.CURT_HEALTH,
477          graph_id=None,
478          input_tensor_ids=[12, 34],
479          output_tensor_ids=[56, 78],
480          debug_tensor_values=[[-1, 0], [-1, 1]])  # [tensor_id, any_inf_nan].
481      monitor.on_execution(i, execution)
482
483    alerts = monitor.alerts()
484    self.assertLen(alerts, 3)
485    for i, alert in enumerate(alerts):
486      self.assertEqual(alert.wall_time, i * 1000)
487      self.assertEqual(alert.op_type, "FooOp")
488      self.assertEqual(alert.output_slot, 1)
489      # The four fields below are unavailable under CURT_HEALTH mode by design.
490      self.assertIsNone(alert.size)
491      self.assertIsNone(alert.num_neg_inf)
492      self.assertIsNone(alert.num_pos_inf)
493      self.assertIsNone(alert.num_nan)
494      self.assertEqual(alert.execution_index, i)
495      self.assertIsNone(alert.graph_execution_trace_index)
496
497
498if __name__ == "__main__":
499  ops.enable_eager_execution()
500  googletest.main()
501