# Owner(s): ["module: cuda"] import sys import unittest import unittest.mock import torch import torch.cuda._gpu_trace as gpu_trace from torch.testing._internal.common_utils import NoTest, run_tests, TEST_CUDA, TestCase # NOTE: Each test needs to be run in a brand new process, to reset the registered hooks # and make sure the CUDA streams are initialized for each test that uses them. if not TEST_CUDA: print("CUDA not available, skipping tests", file=sys.stderr) TestCase = NoTest # noqa: F811 @torch.testing._internal.common_utils.markDynamoStrictTest class TestCudaTrace(TestCase): def setUp(self): torch._C._activate_gpu_trace() self.mock = unittest.mock.MagicMock() def test_event_creation_callback(self): gpu_trace.register_callback_for_event_creation(self.mock) event = torch.cuda.Event() event.record() self.mock.assert_called_once_with(event._as_parameter_.value) def test_event_deletion_callback(self): gpu_trace.register_callback_for_event_deletion(self.mock) event = torch.cuda.Event() event.record() event_id = event._as_parameter_.value del event self.mock.assert_called_once_with(event_id) def test_event_record_callback(self): gpu_trace.register_callback_for_event_record(self.mock) event = torch.cuda.Event() event.record() self.mock.assert_called_once_with( event._as_parameter_.value, torch.cuda.default_stream().cuda_stream ) def test_event_wait_callback(self): gpu_trace.register_callback_for_event_wait(self.mock) event = torch.cuda.Event() event.record() event.wait() self.mock.assert_called_once_with( event._as_parameter_.value, torch.cuda.default_stream().cuda_stream ) def test_memory_allocation_callback(self): gpu_trace.register_callback_for_memory_allocation(self.mock) tensor = torch.empty(10, 4, device="cuda") self.mock.assert_called_once_with(tensor.data_ptr()) def test_memory_deallocation_callback(self): gpu_trace.register_callback_for_memory_deallocation(self.mock) tensor = torch.empty(3, 8, device="cuda") data_ptr = tensor.data_ptr() del tensor self.mock.assert_called_once_with(data_ptr) def test_stream_creation_callback(self): gpu_trace.register_callback_for_stream_creation(self.mock) # see Note [HIP Lazy Streams] if torch.version.hip: user_stream = torch.cuda.Stream() with torch.cuda.stream(user_stream): tensor = torch.ones(5, device="cuda") else: torch.cuda.Stream() self.mock.assert_called() def test_device_synchronization_callback(self): gpu_trace.register_callback_for_device_synchronization(self.mock) torch.cuda.synchronize() self.mock.assert_called() def test_stream_synchronization_callback(self): gpu_trace.register_callback_for_stream_synchronization(self.mock) stream = torch.cuda.Stream() stream.synchronize() self.mock.assert_called_once_with(stream.cuda_stream) def test_event_synchronization_callback(self): gpu_trace.register_callback_for_event_synchronization(self.mock) event = torch.cuda.Event() event.record() event.synchronize() self.mock.assert_called_once_with(event._as_parameter_.value) def test_memcpy_synchronization(self): gpu_trace.register_callback_for_stream_synchronization(self.mock) tensor = torch.rand(5, device="cuda") tensor.nonzero() self.mock.assert_called_once_with(torch.cuda.default_stream().cuda_stream) def test_all_trace_callbacks_called(self): other = unittest.mock.MagicMock() gpu_trace.register_callback_for_memory_allocation(self.mock) gpu_trace.register_callback_for_memory_allocation(other) tensor = torch.empty(10, 4, device="cuda") self.mock.assert_called_once_with(tensor.data_ptr()) other.assert_called_once_with(tensor.data_ptr()) if __name__ == "__main__": run_tests()