pytorch/test/test_autocast.py

*da0073e9SAndroid Build Coastguard Worker# Owner(s): ["module: unknown"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerimport unittest
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerimport torch
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.autocast_test_lists import (
*da0073e9SAndroid Build Coastguard Worker    AutocastCPUTestLists,
*da0073e9SAndroid Build Coastguard Worker    TestAutocast,
*da0073e9SAndroid Build Coastguard Worker)
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.common_utils import (
*da0073e9SAndroid Build Coastguard Worker    IS_WINDOWS,
*da0073e9SAndroid Build Coastguard Worker    run_tests,
*da0073e9SAndroid Build Coastguard Worker    skipIfTorchDynamo,
*da0073e9SAndroid Build Coastguard Worker    TestCase,
*da0073e9SAndroid Build Coastguard Worker)
*da0073e9SAndroid Build Coastguard Workerfrom torch.utils._python_dispatch import TorchDispatchMode
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass TestAutocastCPU(TestAutocast):
*da0073e9SAndroid Build Coastguard Worker    def setUp(self):
*da0073e9SAndroid Build Coastguard Worker        super().setUp()
*da0073e9SAndroid Build Coastguard Worker        self.autocast_lists = AutocastCPUTestLists(torch.device("cpu"))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def tearDown(self):
*da0073e9SAndroid Build Coastguard Worker        del self.autocast_lists
*da0073e9SAndroid Build Coastguard Worker        super().tearDown()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_torch_expect_builtin_promote(self):
*da0073e9SAndroid Build Coastguard Worker        for (
*da0073e9SAndroid Build Coastguard Worker            op,
*da0073e9SAndroid Build Coastguard Worker            args1,
*da0073e9SAndroid Build Coastguard Worker            args2,
*da0073e9SAndroid Build Coastguard Worker            out_type,
*da0073e9SAndroid Build Coastguard Worker        ) in self.autocast_lists.torch_expect_builtin_promote:
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op, args1, torch.float32, device="cpu", out_type=out_type
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args2,
*da0073e9SAndroid Build Coastguard Worker                torch.float32,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                out_type=out_type,
*da0073e9SAndroid Build Coastguard Worker                amp_dtype=torch.float16,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_methods_expect_builtin_promote(self):
*da0073e9SAndroid Build Coastguard Worker        for (
*da0073e9SAndroid Build Coastguard Worker            op,
*da0073e9SAndroid Build Coastguard Worker            args1,
*da0073e9SAndroid Build Coastguard Worker            args2,
*da0073e9SAndroid Build Coastguard Worker            out_type,
*da0073e9SAndroid Build Coastguard Worker        ) in self.autocast_lists.methods_expect_builtin_promote:
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op, args1, torch.float32, device="cpu", module=None, out_type=out_type
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args2,
*da0073e9SAndroid Build Coastguard Worker                torch.float32,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                module=None,
*da0073e9SAndroid Build Coastguard Worker                out_type=out_type,
*da0073e9SAndroid Build Coastguard Worker                amp_dtype=torch.float16,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_torch_16(self):
*da0073e9SAndroid Build Coastguard Worker        for op_with_args in self.autocast_lists.torch_16:
*da0073e9SAndroid Build Coastguard Worker            op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op, args, torch.bfloat16, device="cpu", add_kwargs=maybe_kwargs
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args,
*da0073e9SAndroid Build Coastguard Worker                torch.float16,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                add_kwargs=maybe_kwargs,
*da0073e9SAndroid Build Coastguard Worker                amp_dtype=torch.float16,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_nn_16(self):
*da0073e9SAndroid Build Coastguard Worker        for op_with_args in self.autocast_lists.nn_16:
*da0073e9SAndroid Build Coastguard Worker            op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args,
*da0073e9SAndroid Build Coastguard Worker                torch.bfloat16,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                module=torch._C._nn,
*da0073e9SAndroid Build Coastguard Worker                add_kwargs=maybe_kwargs,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args,
*da0073e9SAndroid Build Coastguard Worker                torch.float16,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                module=torch._C._nn,
*da0073e9SAndroid Build Coastguard Worker                add_kwargs=maybe_kwargs,
*da0073e9SAndroid Build Coastguard Worker                amp_dtype=torch.float16,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_torch_fp32(self):
*da0073e9SAndroid Build Coastguard Worker        for op_with_args in self.autocast_lists.torch_fp32:
*da0073e9SAndroid Build Coastguard Worker            op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op, args, torch.float32, device="cpu", add_kwargs=maybe_kwargs
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args,
*da0073e9SAndroid Build Coastguard Worker                torch.float32,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                add_kwargs=maybe_kwargs,
*da0073e9SAndroid Build Coastguard Worker                amp_dtype=torch.float16,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_nn_fp32(self):
*da0073e9SAndroid Build Coastguard Worker        for op_with_args in self.autocast_lists.nn_fp32:
*da0073e9SAndroid Build Coastguard Worker            op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args,
*da0073e9SAndroid Build Coastguard Worker                torch.float32,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                module=torch._C._nn,
*da0073e9SAndroid Build Coastguard Worker                add_kwargs=maybe_kwargs,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op,
*da0073e9SAndroid Build Coastguard Worker                args,
*da0073e9SAndroid Build Coastguard Worker                torch.float32,
*da0073e9SAndroid Build Coastguard Worker                device="cpu",
*da0073e9SAndroid Build Coastguard Worker                module=torch._C._nn,
*da0073e9SAndroid Build Coastguard Worker                add_kwargs=maybe_kwargs,
*da0073e9SAndroid Build Coastguard Worker                amp_dtype=torch.float16,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo()
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_torch_need_autocast_promote(self):
*da0073e9SAndroid Build Coastguard Worker        for op, args1, args2 in self.autocast_lists.torch_need_autocast_promote:
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(op, args1, torch.float32, device="cpu")
*da0073e9SAndroid Build Coastguard Worker            self._run_autocast_outofplace(
*da0073e9SAndroid Build Coastguard Worker                op, args2, torch.float32, device="cpu", amp_dtype=torch.float16
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @unittest.skipIf(IS_WINDOWS, "Limit support for bf16 path")
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_rnn(self):
*da0073e9SAndroid Build Coastguard Worker        if (
*da0073e9SAndroid Build Coastguard Worker            torch.backends.mkldnn.is_available()
*da0073e9SAndroid Build Coastguard Worker            and torch.ops.mkldnn._is_mkldnn_bf16_supported()
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            x = torch.randn(1, 2, 1)
*da0073e9SAndroid Build Coastguard Worker            hx = torch.randn(2, 2, 1)
*da0073e9SAndroid Build Coastguard Worker            cx = torch.randn(2, 2, 1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.LSTM(1, 1, 2).to(torch.bfloat16)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # Raise ValueError when autocast is not enabled
*da0073e9SAndroid Build Coastguard Worker            with self.assertRaisesRegex(ValueError, "input must have the type"):
*da0073e9SAndroid Build Coastguard Worker                m(x, (hx, cx))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # Should be able to run the below case with autocast
*da0073e9SAndroid Build Coastguard Worker            with torch.amp.autocast(device_type="cpu"):
*da0073e9SAndroid Build Coastguard Worker                m(x, (hx, cx))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_disabled_with_fp32_dtype(self):
*da0073e9SAndroid Build Coastguard Worker        with torch.autocast(device_type="cpu", dtype=torch.float32, enabled=False):
*da0073e9SAndroid Build Coastguard Worker            _ = torch.ones(10)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_generic_autocast(self):
*da0073e9SAndroid Build Coastguard Worker        for op_with_args in self.autocast_lists.torch_16:
*da0073e9SAndroid Build Coastguard Worker            op, args, maybe_kwargs = self.args_maybe_kwargs(op_with_args)
*da0073e9SAndroid Build Coastguard Worker            with torch.amp.autocast(device_type="cpu"):
*da0073e9SAndroid Build Coastguard Worker                generic_autocast_output = getattr(torch, op)(*args, **maybe_kwargs)
*da0073e9SAndroid Build Coastguard Worker            with torch.amp.autocast(device_type="cpu"):
*da0073e9SAndroid Build Coastguard Worker                cpu_autocast_output = getattr(torch, op)(*args, **maybe_kwargs)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(generic_autocast_output, cpu_autocast_output)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_cpu_autocast_deprecated_warning(self):
*da0073e9SAndroid Build Coastguard Worker        with self.assertWarnsRegex(
*da0073e9SAndroid Build Coastguard Worker            FutureWarning,
*da0073e9SAndroid Build Coastguard Worker            r"`torch.cpu.amp.autocast\(args...\)` is deprecated. Please use `torch.amp.autocast\('cpu', args...\)` instead.",
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            with torch.cpu.amp.autocast():
*da0073e9SAndroid Build Coastguard Worker                _ = torch.ones(10)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass CustomLinear(torch.autograd.Function):
*da0073e9SAndroid Build Coastguard Worker    @staticmethod
*da0073e9SAndroid Build Coastguard Worker    def forward(ctx, x, w_t):
*da0073e9SAndroid Build Coastguard Worker        ctx.save_for_backward(x, w_t)
*da0073e9SAndroid Build Coastguard Worker        return torch.nn.functional.linear(x, w_t)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @staticmethod
*da0073e9SAndroid Build Coastguard Worker    def backward(ctx, grad_output):
*da0073e9SAndroid Build Coastguard Worker        x, w_t = ctx.saved_tensors
*da0073e9SAndroid Build Coastguard Worker        with torch.autocast(device_type="cuda"):
*da0073e9SAndroid Build Coastguard Worker            dL_dX = torch.matmul(grad_output, w_t)
*da0073e9SAndroid Build Coastguard Worker            dL_dW = torch.matmul(x.transpose(0, 1), grad_output).transpose(0, 1)
*da0073e9SAndroid Build Coastguard Worker        return dL_dX, dL_dW
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass WeightDTypeCastCounterMode(TorchDispatchMode):
*da0073e9SAndroid Build Coastguard Worker    def __init__(self, weight):
*da0073e9SAndroid Build Coastguard Worker        super().__init__()
*da0073e9SAndroid Build Coastguard Worker        self.dtype_cast_counter = 0
*da0073e9SAndroid Build Coastguard Worker        self.weight = weight
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __torch_dispatch__(self, func, types, args=(), kwargs=None):
*da0073e9SAndroid Build Coastguard Worker        if (
*da0073e9SAndroid Build Coastguard Worker            func is torch.ops.aten._to_copy.default
*da0073e9SAndroid Build Coastguard Worker            and args[0] is self.weight
*da0073e9SAndroid Build Coastguard Worker            and kwargs["dtype"] is torch.float16
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            self.dtype_cast_counter += 1
*da0073e9SAndroid Build Coastguard Worker        return func(*args, **kwargs)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __enter__(self):
*da0073e9SAndroid Build Coastguard Worker        self.old_clear_cache = torch.clear_autocast_cache
*da0073e9SAndroid Build Coastguard Worker        torch.clear_autocast_cache = lambda: None
*da0073e9SAndroid Build Coastguard Worker        return super().__enter__()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __exit__(self, exc_type, exc_val, exc_tb):
*da0073e9SAndroid Build Coastguard Worker        torch.clear_autocast_cache = self.old_clear_cache
*da0073e9SAndroid Build Coastguard Worker        return super().__exit__(exc_type, exc_val, exc_tb)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker@unittest.skipIf(not torch.cuda.is_available(), "requires cuda")
*da0073e9SAndroid Build Coastguard Workerclass TestAutocastGPU(TestCase):
*da0073e9SAndroid Build Coastguard Worker    def test_cast_cache_is_global(self):
*da0073e9SAndroid Build Coastguard Worker        """
*da0073e9SAndroid Build Coastguard Worker        Verifies that the autocast cache is global. This is done by
*da0073e9SAndroid Build Coastguard Worker        mocking out cache clearing at the end of the forward pass,
*da0073e9SAndroid Build Coastguard Worker        running forward+backward with an explicit call to autocast in the
*da0073e9SAndroid Build Coastguard Worker        backward, and verifying that the weight only get cast to float16 once.
*da0073e9SAndroid Build Coastguard Worker        """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        data = torch.randn(2, 3).cuda()
*da0073e9SAndroid Build Coastguard Worker        weight = torch.nn.Parameter(torch.randn(4, 3).cuda())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with WeightDTypeCastCounterMode(weight) as mode:
*da0073e9SAndroid Build Coastguard Worker            with torch.autocast(device_type="cuda"):
*da0073e9SAndroid Build Coastguard Worker                output = CustomLinear.apply(data, weight)
*da0073e9SAndroid Build Coastguard Worker                s = output.sum()
*da0073e9SAndroid Build Coastguard Worker            s.backward()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(mode.dtype_cast_counter, 1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_cache_disabled(self):
*da0073e9SAndroid Build Coastguard Worker        data = torch.randn(2, 3).cuda()
*da0073e9SAndroid Build Coastguard Worker        weight = torch.nn.Parameter(torch.randn(4, 3).cuda())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        try:
*da0073e9SAndroid Build Coastguard Worker            torch._C._set_cached_tensors_enabled(True)
*da0073e9SAndroid Build Coastguard Worker            torch._C._add_cached_tensor(weight)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            with WeightDTypeCastCounterMode(weight) as mode:
*da0073e9SAndroid Build Coastguard Worker                with torch.autocast(device_type="cuda"):
*da0073e9SAndroid Build Coastguard Worker                    output = CustomLinear.apply(data, weight)
*da0073e9SAndroid Build Coastguard Worker                    s = output.sum()
*da0073e9SAndroid Build Coastguard Worker                s.backward()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # we should not have cached the conversion of the weight
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(mode.dtype_cast_counter, 2)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        finally:
*da0073e9SAndroid Build Coastguard Worker            torch._C._set_cached_tensors_enabled(False)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # index_put under AMP follows a cast policy called "promote",
*da0073e9SAndroid Build Coastguard Worker    # https://github.com/pytorch/pytorch/blob/4fcd15a667df5b80e81db6563d8d3123a0cbd051/aten/src/ATen/autocast_mode.h#L205-L230
*da0073e9SAndroid Build Coastguard Worker    # That means:
*da0073e9SAndroid Build Coastguard Worker    #   (1) double precision is ignored,
*da0073e9SAndroid Build Coastguard Worker    #   (2) if any argument is float, then all arguments are promoted to float,
*da0073e9SAndroid Build Coastguard Worker    #   (3) if all arguments are of lower precision dtype, then all dtypes must be equal to the same amp autocast dtype.
*da0073e9SAndroid Build Coastguard Worker    # Since AMP autocast dtype is thread-local, it is not preserved across thread boundaries during autograd execution,
*da0073e9SAndroid Build Coastguard Worker    # and due to the multi-threaded nature of the autograd, the forward pass is being run in bfloat16, while the backward
*da0073e9SAndroid Build Coastguard Worker    # pass defaults to float16. The dtype mismatch leads to the error in the policy, as the criteria (3) is not satisfied.
*da0073e9SAndroid Build Coastguard Worker    # For more info see https://github.com/pytorch/pytorch/issues/132715.
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_prioritize(self):
*da0073e9SAndroid Build Coastguard Worker        device = "cuda"
*da0073e9SAndroid Build Coastguard Worker        dtype = torch.bfloat16
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with torch.autocast(device_type=device, enabled=True, dtype=dtype):
*da0073e9SAndroid Build Coastguard Worker            t = torch.randn([3, 4, 5], dtype=dtype, device=device, requires_grad=True)
*da0073e9SAndroid Build Coastguard Worker            index = torch.randint(
*da0073e9SAndroid Build Coastguard Worker                low=0, high=3, size=[3, 4, 5], dtype=torch.int64, device=device
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            val = torch.randn(1, dtype=dtype, device=device)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            res = torch.index_put(t, [index], val)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            loss = res.mean()
*da0073e9SAndroid Build Coastguard Worker            loss.backward()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker@unittest.skipIf(not torch.backends.mps.is_available(), "requires mps")
*da0073e9SAndroid Build Coastguard Workerclass TestAutocastMPS(TestCase):
*da0073e9SAndroid Build Coastguard Worker    def test_cast_cache_is_global(self):
*da0073e9SAndroid Build Coastguard Worker        class CustomLinear(torch.autograd.Function):
*da0073e9SAndroid Build Coastguard Worker            @staticmethod
*da0073e9SAndroid Build Coastguard Worker            def forward(ctx, x, w_t):
*da0073e9SAndroid Build Coastguard Worker                ctx.save_for_backward(x, w_t)
*da0073e9SAndroid Build Coastguard Worker                return torch.nn.functional.linear(x, w_t)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            @staticmethod
*da0073e9SAndroid Build Coastguard Worker            def backward(ctx, grad_output):
*da0073e9SAndroid Build Coastguard Worker                x, w_t = ctx.saved_tensors
*da0073e9SAndroid Build Coastguard Worker                with torch.autocast(device_type="mps"):
*da0073e9SAndroid Build Coastguard Worker                    dL_dX = torch.matmul(grad_output, w_t)
*da0073e9SAndroid Build Coastguard Worker                    dL_dW = torch.matmul(x.transpose(0, 1), grad_output).transpose(0, 1)
*da0073e9SAndroid Build Coastguard Worker                return dL_dX, dL_dW
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        data = torch.randn(2, 3).to("mps")
*da0073e9SAndroid Build Coastguard Worker        weight = torch.nn.Parameter(torch.randn(4, 3).to("mps"))
*da0073e9SAndroid Build Coastguard Worker        weight_dtype_cast_counter = 0
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class WeightDTypeCastCounterMode(TorchDispatchMode):
*da0073e9SAndroid Build Coastguard Worker            def __torch_dispatch__(self, func, types, args=(), kwargs=None):
*da0073e9SAndroid Build Coastguard Worker                if (
*da0073e9SAndroid Build Coastguard Worker                    func is torch.ops.aten._to_copy.default
*da0073e9SAndroid Build Coastguard Worker                    and args[0] is weight
*da0073e9SAndroid Build Coastguard Worker                    and kwargs["dtype"] is torch.float16
*da0073e9SAndroid Build Coastguard Worker                ):
*da0073e9SAndroid Build Coastguard Worker                    nonlocal weight_dtype_cast_counter
*da0073e9SAndroid Build Coastguard Worker                    weight_dtype_cast_counter += 1
*da0073e9SAndroid Build Coastguard Worker                return func(*args, **kwargs)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def __enter__(self):
*da0073e9SAndroid Build Coastguard Worker                # self.old_clear_cache = torch.clear_autocast_cache
*da0073e9SAndroid Build Coastguard Worker                # torch.clear_autocast_cache = lambda: None
*da0073e9SAndroid Build Coastguard Worker                return super().__enter__()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def __exit__(self, exc_type, exc_val, exc_tb):
*da0073e9SAndroid Build Coastguard Worker                # torch.clear_autocast_cache = self.old_clear_cache
*da0073e9SAndroid Build Coastguard Worker                return super().__exit__(exc_type, exc_val, exc_tb)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with WeightDTypeCastCounterMode():
*da0073e9SAndroid Build Coastguard Worker            with torch.autocast(device_type="mps"):
*da0073e9SAndroid Build Coastguard Worker                output = CustomLinear.apply(data, weight)
*da0073e9SAndroid Build Coastguard Worker                s = output.sum()
*da0073e9SAndroid Build Coastguard Worker            s.backward()
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(weight_dtype_cast_counter, 2)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass TestTorchAutocast(TestCase):
*da0073e9SAndroid Build Coastguard Worker    def test_autocast_fast_dtype(self):
*da0073e9SAndroid Build Coastguard Worker        gpu_fast_dtype = torch.get_autocast_dtype(device_type="cuda")
*da0073e9SAndroid Build Coastguard Worker        cpu_fast_dtype = torch.get_autocast_dtype(device_type="cpu")
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(gpu_fast_dtype, torch.half)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(cpu_fast_dtype, torch.bfloat16)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_invalid_device(self):
*da0073e9SAndroid Build Coastguard Worker        dev = "not a real device"
*da0073e9SAndroid Build Coastguard Worker        msg = f"Invalid device string: '{dev}'"
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(RuntimeError, msg):
*da0073e9SAndroid Build Coastguard Worker            with torch.autocast(device_type=dev):
*da0073e9SAndroid Build Coastguard Worker                _ = torch.tensor(1)
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(RuntimeError, msg):
*da0073e9SAndroid Build Coastguard Worker            assert torch.amp.is_autocast_available(device_type=dev)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_non_string_device(self):
*da0073e9SAndroid Build Coastguard Worker        """Test that `autocast` throws a ValueError when provided a `torch.device` object for `device_type` instead of a string"""
*da0073e9SAndroid Build Coastguard Worker        dev = torch.device("cpu")
*da0073e9SAndroid Build Coastguard Worker        msg = f"Expected `device_type` of type `str`, got: `{type(dev)}`"
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(expected_exception=ValueError, expected_regex=msg):
*da0073e9SAndroid Build Coastguard Worker            torch.autocast(device_type=dev)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerif __name__ == "__main__":
*da0073e9SAndroid Build Coastguard Worker    run_tests()