test/nn/test_parametrization.py

*da0073e9SAndroid Build Coastguard Worker# Owner(s): ["module: nn"]
*da0073e9SAndroid Build Coastguard Workerimport pickle
*da0073e9SAndroid Build Coastguard Workerfrom copy import deepcopy
*da0073e9SAndroid Build Coastguard Workerfrom itertools import product
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerimport torch
*da0073e9SAndroid Build Coastguard Workerimport torch.nn as nn
*da0073e9SAndroid Build Coastguard Workerimport torch.nn.functional as F
*da0073e9SAndroid Build Coastguard Workerimport torch.nn.init as init
*da0073e9SAndroid Build Coastguard Workerimport torch.nn.utils.parametrize as parametrize
*da0073e9SAndroid Build Coastguard Workerfrom torch import Tensor
*da0073e9SAndroid Build Coastguard Workerfrom torch.__future__ import get_swap_module_params_on_conversion
*da0073e9SAndroid Build Coastguard Workerfrom torch.nn import Buffer, Parameter
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.common_cuda import TEST_MULTIGPU
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.common_device_type import instantiate_device_type_tests
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.common_nn import NNTestCase
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.common_utils import (
*da0073e9SAndroid Build Coastguard Worker    gradcheck,
*da0073e9SAndroid Build Coastguard Worker    instantiate_parametrized_tests,
*da0073e9SAndroid Build Coastguard Worker    run_tests,
*da0073e9SAndroid Build Coastguard Worker    set_default_dtype,
*da0073e9SAndroid Build Coastguard Worker    skipIfNoLapack,
*da0073e9SAndroid Build Coastguard Worker    skipIfTorchDynamo,
*da0073e9SAndroid Build Coastguard Worker    swap,
*da0073e9SAndroid Build Coastguard Worker    TemporaryFileName,
*da0073e9SAndroid Build Coastguard Worker)
*da0073e9SAndroid Build Coastguard Workerfrom torch.testing._internal.two_tensor import TwoTensor
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass TestNNParametrization(NNTestCase):
*da0073e9SAndroid Build Coastguard Worker    _do_cuda_memory_leak_check = True
*da0073e9SAndroid Build Coastguard Worker    _do_cuda_non_default_stream = True
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    #        and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    # torch/nn/utils/parametrize
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_register_and_remove_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that it is possible to add a few parametrizations
*da0073e9SAndroid Build Coastguard Worker        on a parameter or a buffer and that removing them restores the initial state
*da0073e9SAndroid Build Coastguard Worker        It also tests that backpropagating through them works as expected
*da0073e9SAndroid Build Coastguard Worker        """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Define a couple matrix parametrizations
*da0073e9SAndroid Build Coastguard Worker        class Skew(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                X = X.tril(-1)
*da0073e9SAndroid Build Coastguard Worker                return X - X.T
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Orthogonal(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                # Cayley map
*da0073e9SAndroid Build Coastguard Worker                # If X is skew-symmetric it returns an orthogonal matrix
*da0073e9SAndroid Build Coastguard Worker                Id = torch.eye(X.size(0), device=X.device)
*da0073e9SAndroid Build Coastguard Worker                # We call contiguous because solve returns a tensor with strides that are Fortran-contiguous
*da0073e9SAndroid Build Coastguard Worker                # and autograd raises a performance warning.
*da0073e9SAndroid Build Coastguard Worker                # This happens when we remove the parametrization with leave_parametrized=True,
*da0073e9SAndroid Build Coastguard Worker                # which does a set_ with a non-contiguous tensor while the gradient is contiguous
*da0073e9SAndroid Build Coastguard Worker                return torch.linalg.solve(Id + X, Id - X).contiguous()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Resize(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return X[[0]]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class NoResize(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return X
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Define a couple vector parametrizations
*da0073e9SAndroid Build Coastguard Worker        class FirstZero(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return torch.cat([x.new_zeros(1), x[1:]])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class LastZero(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return torch.cat([x[:-1], x.new_zeros(1)])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(8, 8)
*da0073e9SAndroid Build Coastguard Worker        initial_weight_id = id(model.weight)
*da0073e9SAndroid Build Coastguard Worker        initial_bias_id = id(model.bias)
*da0073e9SAndroid Build Coastguard Worker        initial_model = deepcopy(model)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test unsafe flag
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(
*da0073e9SAndroid Build Coastguard Worker            ValueError,
*da0073e9SAndroid Build Coastguard Worker            "Registering a parametrization may not change the shape of the tensor",
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(
*da0073e9SAndroid Build Coastguard Worker                model, "weight", Resize()
*da0073e9SAndroid Build Coastguard Worker            )  # default unsafe = False
*da0073e9SAndroid Build Coastguard Worker            model(torch.ones(8, 8))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # One parametrization with unsafe=True
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Resize(), unsafe=True)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(model.weight.shape[0] == 1)
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, initial_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Two parametrizations with unsafe=True
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Resize(), unsafe=True)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", NoResize(), unsafe=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(model.weight.shape[0] == 1)
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, initial_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test unsafe flag doesn't change expected behavior
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew(), unsafe=True)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        # Result should be skew-symmetric
*da0073e9SAndroid Build Coastguard Worker        A = model.weight
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(A, -A.T)
*da0073e9SAndroid Build Coastguard Worker        if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker            # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker            # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker            del A
*da0073e9SAndroid Build Coastguard Worker        # Remove and check consistency
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, initial_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test one parametrization
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        # Result should be skew-symmetric
*da0073e9SAndroid Build Coastguard Worker        A = model.weight
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(A, -A.T)
*da0073e9SAndroid Build Coastguard Worker        if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker            # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker            # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker            del A
*da0073e9SAndroid Build Coastguard Worker        # Remove and check consistency
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, initial_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test two parametrizations at the same time and removing them
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Orthogonal())
*da0073e9SAndroid Build Coastguard Worker        # Result should be orthogonal
*da0073e9SAndroid Build Coastguard Worker        X = model.weight
*da0073e9SAndroid Build Coastguard Worker        Id = torch.eye(X.size(0), device=X.device)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(X.T @ X, Id)
*da0073e9SAndroid Build Coastguard Worker        if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker            # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker            # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker            del X
*da0073e9SAndroid Build Coastguard Worker        # Structure tests
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertIn("weight", model.parametrizations)
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        # Remove
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, initial_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Add everything
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Orthogonal())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", FirstZero())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", LastZero())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Basic tests
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[0].item(), 0.0)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[-1].item(), 0.0)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            len(list(model.parameters())), 2
*da0073e9SAndroid Build Coastguard Worker        )  # Nothing weird has happpened
*da0073e9SAndroid Build Coastguard Worker        # Should not throw
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        sgd = torch.optim.SGD(model.parameters(), lr=0.01)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        weight_copy = model.weight.clone()
*da0073e9SAndroid Build Coastguard Worker        bias_copy = model.bias.clone()
*da0073e9SAndroid Build Coastguard Worker        sgd.zero_grad()
*da0073e9SAndroid Build Coastguard Worker        (model.weight.T @ model.bias).sum().backward()
*da0073e9SAndroid Build Coastguard Worker        sgd.step()
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.weight, weight_copy)
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.bias, bias_copy)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Remove first parametrization.
*da0073e9SAndroid Build Coastguard Worker        # Check that the model is still parametrized and so is the second parameter
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))  # Still parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(
*da0073e9SAndroid Build Coastguard Worker            parametrize.is_parametrized(model, "weight")
*da0073e9SAndroid Build Coastguard Worker        )  # Parametrization removed
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(
*da0073e9SAndroid Build Coastguard Worker            parametrize.is_parametrized(model, "bias")
*da0073e9SAndroid Build Coastguard Worker        )  # Still parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[0].item(), 0.0)  # Still parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[-1].item(), 0.0)  # Still parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.weight, initial_model.weight)  # Has been updated
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.weight), initial_weight_id)  # Keeps the same id
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(list(model.parameters())), 2)  # Nothing weird has happened
*da0073e9SAndroid Build Coastguard Worker        # Should not throw
*da0073e9SAndroid Build Coastguard Worker        weight_copy = model.weight.clone()
*da0073e9SAndroid Build Coastguard Worker        bias_copy = model.bias.clone()
*da0073e9SAndroid Build Coastguard Worker        sgd.zero_grad()
*da0073e9SAndroid Build Coastguard Worker        (model.weight.T @ model.bias).sum().backward()
*da0073e9SAndroid Build Coastguard Worker        sgd.step()
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.weight, weight_copy)
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.bias, bias_copy)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Remove the second parametrization.
*da0073e9SAndroid Build Coastguard Worker        # Check that the module is not parametrized
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "bias", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model))  # Not parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.bias, initial_model.bias)  # Has been updated
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.bias[0].item(), 0.0)  # Not parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.bias[-1].item(), 0.0)  # Not parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(id(model.bias), initial_bias_id)  # Keeps the same id
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(
*da0073e9SAndroid Build Coastguard Worker            hasattr(model, "parametrizations")
*da0073e9SAndroid Build Coastguard Worker        )  # Not parametrized the module
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)  # Resores the previous class
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(list(model.parameters())), 2)  # Nothing weird has happeed
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Should not throw things are updated
*da0073e9SAndroid Build Coastguard Worker        weight_copy = model.weight.clone()
*da0073e9SAndroid Build Coastguard Worker        bias_copy = model.bias.clone()
*da0073e9SAndroid Build Coastguard Worker        sgd.zero_grad()
*da0073e9SAndroid Build Coastguard Worker        (model.weight.T @ model.bias).sum().backward()
*da0073e9SAndroid Build Coastguard Worker        sgd.step()
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.weight, weight_copy)
*da0073e9SAndroid Build Coastguard Worker        self.assertNotEqual(model.bias, bias_copy)
*da0073e9SAndroid Build Coastguard Worker        if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker            # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker            # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker            del weight_copy, bias_copy
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test leave_parametrized=True
*da0073e9SAndroid Build Coastguard Worker        for _ in range(2):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "weight", Orthogonal())
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker                model, "weight", leave_parametrized=True
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            # We didn't change the dtype nor had multiple inputs, so the id should be the same
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(model.bias), initial_bias_id)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # Should not throw. Things are updated
*da0073e9SAndroid Build Coastguard Worker            weight_copy = model.weight.clone()
*da0073e9SAndroid Build Coastguard Worker            bias_copy = model.bias.clone()
*da0073e9SAndroid Build Coastguard Worker            sgd.zero_grad()
*da0073e9SAndroid Build Coastguard Worker            (model.weight.T @ model.bias).sum().backward()
*da0073e9SAndroid Build Coastguard Worker            sgd.step()
*da0073e9SAndroid Build Coastguard Worker            self.assertNotEqual(model.weight, weight_copy)
*da0073e9SAndroid Build Coastguard Worker            self.assertNotEqual(model.bias, bias_copy)
*da0073e9SAndroid Build Coastguard Worker            if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker                # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker                # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker                del weight_copy, bias_copy
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_register_and_remove_nested_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that it is possible to nest the parametrizations
*da0073e9SAndroid Build Coastguard Worker        meaning that the original param is parametrized again
*da0073e9SAndroid Build Coastguard Worker        """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Skew(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                X = X.tril(-1)
*da0073e9SAndroid Build Coastguard Worker                return X - X.T
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(8, 8)
*da0073e9SAndroid Build Coastguard Worker        # Add top level parametrization
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        # Result should be skew-symmetric
*da0073e9SAndroid Build Coastguard Worker        A = model.weight
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(A, -A.T)
*da0073e9SAndroid Build Coastguard Worker        if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker            # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker            # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker            del A
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Add nested parametrization
*da0073e9SAndroid Build Coastguard Worker        param_mod = model.parametrizations.weight
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(param_mod, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(param_mod))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(param_mod, "original"))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(param_mod, "original", Skew())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(param_mod, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(param_mod))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(param_mod, "original"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("original", param_mod._parameters)
*da0073e9SAndroid Build Coastguard Worker        # Result should be skew-symmetric
*da0073e9SAndroid Build Coastguard Worker        A = param_mod.original
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(A, -A.T)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Remove nested param and check consistency
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker            param_mod, "original", leave_parametrized=False
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(param_mod, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(param_mod.__class__, parametrize.ParametrizationList)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Remove top level and check consistency
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=False)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_register_and_remove_buffer_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that it is possible to add and remove parametrizations on buffers"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Define a couple vector parametrizations
*da0073e9SAndroid Build Coastguard Worker        class FirstZero(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return torch.cat([x.new_zeros(1), x[1:]])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class LastZero(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return torch.cat([x[:-1], x.new_zeros(1)])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(8, 8)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Instantiate parametrizations on buffers. It should work as expected
*da0073e9SAndroid Build Coastguard Worker        delattr(model, "bias")
*da0073e9SAndroid Build Coastguard Worker        model.bias = Buffer(torch.ones(8))
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", FirstZero())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", LastZero())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[0].item(), 0.0)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[-1].item(), 0.0)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue((model.bias[1:-1] == torch.ones(6)).all())
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(list(model.parameters())), 1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Remove parametrizations on buffers. It should work as expected
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "bias", leave_parametrized=True)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[0].item(), 0.0)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.bias[-1].item(), 0.0)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue((model.bias[1:-1] == torch.ones(6)).all())
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(list(model.parameters())), 1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    #        and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_serialization_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that it is possible to serialize a parametrized model via state_dict"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # A stateful parametrization
*da0073e9SAndroid Build Coastguard Worker        class Orthogonal(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def __init__(self, n):
*da0073e9SAndroid Build Coastguard Worker                super().__init__()
*da0073e9SAndroid Build Coastguard Worker                self.id = Buffer(torch.eye(n))
*da0073e9SAndroid Build Coastguard Worker                self.B = Buffer(torch.empty(n, n))
*da0073e9SAndroid Build Coastguard Worker                init.orthogonal_(self.B)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                A = X.triu(1)
*da0073e9SAndroid Build Coastguard Worker                A = A - A.T
*da0073e9SAndroid Build Coastguard Worker                return self.B @ torch.linalg.solve(self.id + A, self.id - A)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        def get_model():
*da0073e9SAndroid Build Coastguard Worker            model = torch.nn.Sequential(
*da0073e9SAndroid Build Coastguard Worker                torch.nn.Linear(5, 5),
*da0073e9SAndroid Build Coastguard Worker                torch.nn.ReLU(),
*da0073e9SAndroid Build Coastguard Worker                torch.nn.Linear(5, 1),
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model[0], "weight", Orthogonal(5))
*da0073e9SAndroid Build Coastguard Worker            return model
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = get_model()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        prev_weight = model[0].weight
*da0073e9SAndroid Build Coastguard Worker        prev_B = model[0].parametrizations.weight[0].B
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        new_model = get_model()
*da0073e9SAndroid Build Coastguard Worker        with TemporaryFileName() as fname:
*da0073e9SAndroid Build Coastguard Worker            torch.save(model.state_dict(), fname)
*da0073e9SAndroid Build Coastguard Worker            new_model.load_state_dict(torch.load(fname))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Integrity tests
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(new_model[0], "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(prev_weight, new_model[0].weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(prev_B, new_model[0].parametrizations.weight[0].B)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Trying to save the whole parametrized model raises
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(RuntimeError, "state_dict"):
*da0073e9SAndroid Build Coastguard Worker            with TemporaryFileName() as fname:
*da0073e9SAndroid Build Coastguard Worker                torch.save(model, fname)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    #        and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_initialization_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that it is possible to initialize a parametrization when it
*da0073e9SAndroid Build Coastguard Worker        implements a `right_inverse` method
*da0073e9SAndroid Build Coastguard Worker        """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Skew(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                A = X.triu(1)
*da0073e9SAndroid Build Coastguard Worker                return A - A.T
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def is_skew(self, A):
*da0073e9SAndroid Build Coastguard Worker                return torch.allclose(A, -A.T, atol=1e-6)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, X):
*da0073e9SAndroid Build Coastguard Worker                if not self.is_skew(X):
*da0073e9SAndroid Build Coastguard Worker                    raise ValueError("The matrix is not skew-symmetric.")
*da0073e9SAndroid Build Coastguard Worker                return X.triu(1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Implements a Cayley map where right_inverse is not quite the inverse of forward
*da0073e9SAndroid Build Coastguard Worker        class Orthogonal(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def __init__(self, n):
*da0073e9SAndroid Build Coastguard Worker                super().__init__()
*da0073e9SAndroid Build Coastguard Worker                self.B = Buffer(torch.eye(n))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                Id = torch.eye(X.size(0))
*da0073e9SAndroid Build Coastguard Worker                return self.B @ torch.linalg.solve(Id + X, Id - X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def is_orthogonal(self, X):
*da0073e9SAndroid Build Coastguard Worker                Id = torch.eye(X.size(0))
*da0073e9SAndroid Build Coastguard Worker                return torch.allclose(X.T @ X, Id, atol=1e-4)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, X):
*da0073e9SAndroid Build Coastguard Worker                if not self.is_orthogonal(X):
*da0073e9SAndroid Build Coastguard Worker                    raise ValueError("The input is not orthogonal.")
*da0073e9SAndroid Build Coastguard Worker                # cayley(0) == Id, so B @ cayley(0) == B
*da0073e9SAndroid Build Coastguard Worker                self.B = X
*da0073e9SAndroid Build Coastguard Worker                return torch.zeros_like(X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        N = 5
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(N, N)
*da0073e9SAndroid Build Coastguard Worker        # Register the skew-symmetric constraint. The result is now skew-symmetric
*da0073e9SAndroid Build Coastguard Worker        skew = Skew()
*da0073e9SAndroid Build Coastguard Worker        # Make the weight skew-symmetric before registering the parametrization
*da0073e9SAndroid Build Coastguard Worker        with torch.no_grad():
*da0073e9SAndroid Build Coastguard Worker            model.weight.set_(skew(model.weight))
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", skew)
*da0073e9SAndroid Build Coastguard Worker        X = torch.rand(N, N)
*da0073e9SAndroid Build Coastguard Worker        # X is not skew-symmetric, so it throws an error
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaises(ValueError):
*da0073e9SAndroid Build Coastguard Worker            model.weight = X
*da0073e9SAndroid Build Coastguard Worker        # Make X skew-symmetric
*da0073e9SAndroid Build Coastguard Worker        X = X - X.T
*da0073e9SAndroid Build Coastguard Worker        model.weight = X
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.parametrizations.weight.original, X.triu(1))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Having several parametrizations registered should work in the same way
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Orthogonal(N))
*da0073e9SAndroid Build Coastguard Worker        # Register now the Cayley map. The result is now orthogonal
*da0073e9SAndroid Build Coastguard Worker        X = torch.rand(N, N)
*da0073e9SAndroid Build Coastguard Worker        # X is not orthogonal, so it throws an error
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaises(ValueError):
*da0073e9SAndroid Build Coastguard Worker            model.weight = X
*da0073e9SAndroid Build Coastguard Worker        init.orthogonal_(X)
*da0073e9SAndroid Build Coastguard Worker        model.weight = X
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, X)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.parametrizations.weight.original, torch.zeros_like(X))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_errors_unparametrized_tensor_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        # Test errors when registering a parametrization on an unparametrized tensor
*da0073e9SAndroid Build Coastguard Worker        module = nn.Linear(3, 4)
*da0073e9SAndroid Build Coastguard Worker        weight_init = module.weight.clone()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Identity(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Register a parametrization on a non-existing parameter throws
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "does not have a parameter"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "foo", Identity())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Removing parametrizations from an unparametrized tensor throws
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "does not have a parametrization"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(module, "bias")
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # A correct parametrization with several outputs
*da0073e9SAndroid Build Coastguard Worker        class Sum(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x, y):
*da0073e9SAndroid Build Coastguard Worker                return x + y
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, z):
*da0073e9SAndroid Build Coastguard Worker                return z, torch.zeros_like(z)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(module, "weight", Sum())
*da0073e9SAndroid Build Coastguard Worker        # Cannot remove a parametrization with several outputs with `leave_parametrized=False`
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "leave_parametrized=False"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker                module, "weight", leave_parametrized=False
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(module, "weight", leave_parametrized=True)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # A parametrization with an incorrect number of outputs
*da0073e9SAndroid Build Coastguard Worker        class WrongNumberParams(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x, y, z):
*da0073e9SAndroid Build Coastguard Worker                return x + y + z
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, w):
*da0073e9SAndroid Build Coastguard Worker                return w, torch.zeros_like(w)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Makes param(*param.right_inverse(X)) fail
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(TypeError, "positional argument"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", WrongNumberParams())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # A parametrization with a right_inverse that does not return a Tensor or Sequence[Tensor]
*da0073e9SAndroid Build Coastguard Worker        class WrongRightInverse(Identity):
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, z):
*da0073e9SAndroid Build Coastguard Worker                return None
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # right_inverse should return a Tensor or a Sequence[Tensor]
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "Tensor or a Sequence of"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", WrongRightInverse())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # If it's a sequence, it must to be a sequence of tensors
*da0073e9SAndroid Build Coastguard Worker        class WrongRightInverseSequence(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x, y):
*da0073e9SAndroid Build Coastguard Worker                return x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, z):
*da0073e9SAndroid Build Coastguard Worker                return None, z
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "of the sequence with type"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(
*da0073e9SAndroid Build Coastguard Worker                module, "weight", WrongRightInverseSequence()
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # A parametrization from one tensor to one tensor that changes the dtype
*da0073e9SAndroid Build Coastguard Worker        class ChangeDtypeInverse(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x.float()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, w):
*da0073e9SAndroid Build Coastguard Worker                return w.bool()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # For parametrizations that return one tensor, right_inverse may not change the dtype
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(
*da0073e9SAndroid Build Coastguard Worker            ValueError, "outputs one tensor, it may not change the dtype"
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeDtypeInverse())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Doesn't return a tensor
*da0073e9SAndroid Build Coastguard Worker        class NotTensor(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 2
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Forward must return a tensor
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "must return a tensor"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", NotTensor())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # A parametrization from one tensor to one tensor that changes the dtype
*da0073e9SAndroid Build Coastguard Worker        class ChangeDtype(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x.bool()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # forward should not change the initial dtype
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "may not change the dtype"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeDtype())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Change shape
*da0073e9SAndroid Build Coastguard Worker        class ChangeShape(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x[:-1]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # forward should not change the original shape
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "may not change the shape"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeShape())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Many to one that changes dtype
*da0073e9SAndroid Build Coastguard Worker        class ChangeDtypeMulti(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x, y):
*da0073e9SAndroid Build Coastguard Worker                return (x + y).bool()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, w):
*da0073e9SAndroid Build Coastguard Worker                return w, w + 1
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # forward should not change the original shape even for parametrizations with many inputs
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "may not change the dtype"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeDtypeMulti())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Returning a sequence of size one, although weird, it's correct
*da0073e9SAndroid Build Coastguard Worker        class SequenceLen1(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, w):
*da0073e9SAndroid Build Coastguard Worker                return (w,)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(module, "weight", SequenceLen1())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(module.parametrizations.weight, "original0"))
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(module.parametrizations.weight, "original1"))
*da0073e9SAndroid Build Coastguard Worker        _ = module.weight  # Does not throw
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(module, "weight", leave_parametrized=True)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # None of the operations above should have altered the weight
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(module.weight, weight_init)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_errors_parametrized_tensor_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        # Test errors when registering a parametrization on a parametrized tensor
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Identity(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        module = nn.Linear(3, 4)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(module, "weight", Identity())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Has to return a tensor
*da0073e9SAndroid Build Coastguard Worker        class WrongReturn(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x, x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "must return a tensor"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", WrongReturn())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(module.parametrizations.weight), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(isinstance(module.parametrizations.weight[0], Identity))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Cannot change dtype
*da0073e9SAndroid Build Coastguard Worker        class ChangeDtype(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x.bool()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "may not change the dtype"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeDtype())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(module.parametrizations.weight), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(isinstance(module.parametrizations.weight[0], Identity))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Cannot change shape
*da0073e9SAndroid Build Coastguard Worker        class ChangeShape(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x[:-1]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "may not change the shape"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeShape())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(module.parametrizations.weight), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(isinstance(module.parametrizations.weight[0], Identity))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # The following checks are mostly due to bugs in the code of the parametrization
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # right_inverse has to return a tensor
*da0073e9SAndroid Build Coastguard Worker        class WrongReturnInverse(Identity):
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x, x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "right_inverse must return a tensor"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", WrongReturnInverse())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(module.parametrizations.weight), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(isinstance(module.parametrizations.weight[0], Identity))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Cannot change dtype
*da0073e9SAndroid Build Coastguard Worker        class ChangeDtypeInverse(Identity):
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x.bool()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "must have the same dtype"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeDtypeInverse())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(module.parametrizations.weight), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(isinstance(module.parametrizations.weight[0], Identity))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Cannot change shape
*da0073e9SAndroid Build Coastguard Worker        class ChangeShapeInverse(Identity):
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x[:-1]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "must have the same shape"):
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(module, "weight", ChangeShapeInverse())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(module))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(module.parametrizations.weight), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(isinstance(module.parametrizations.weight[0], Identity))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    #        and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_multiple_inputs_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        # A parametrization with several outputs
*da0073e9SAndroid Build Coastguard Worker        class RankOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x, y):
*da0073e9SAndroid Build Coastguard Worker                # Form a rank-1 matrix from a pair of vectors
*da0073e9SAndroid Build Coastguard Worker                return x.unsqueeze(-1) @ y.unsqueeze(-2)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, Y):
*da0073e9SAndroid Build Coastguard Worker                # We project the given matrix onto the rank 1 matrices
*da0073e9SAndroid Build Coastguard Worker                U, S, Vh = torch.linalg.svd(Y, full_matrices=False)
*da0073e9SAndroid Build Coastguard Worker                # S is ordered in a decreasing way.
*da0073e9SAndroid Build Coastguard Worker                s0_sqrt = S[0].sqrt().unsqueeze(-1)
*da0073e9SAndroid Build Coastguard Worker                return U[..., :, 0] * s0_sqrt, Vh[..., 0, :] * s0_sqrt
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Simple parametrisation
*da0073e9SAndroid Build Coastguard Worker        class Double(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 2.0 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, w):
*da0073e9SAndroid Build Coastguard Worker                return 0.5 * w
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(3, 3)
*da0073e9SAndroid Build Coastguard Worker        # Test one parametrization
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", RankOne())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model.parametrizations.weight, "original0"))
*da0073e9SAndroid Build Coastguard Worker        self.assertIn("original0", model.parametrizations.weight._parameters)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(hasattr(model.parametrizations.weight, "original1"))
*da0073e9SAndroid Build Coastguard Worker        self.assertIn("original1", model.parametrizations.weight._parameters)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker        self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker        # Result should be rank 1
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(torch.linalg.matrix_rank(model.weight).item(), 1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "leave_parametrized=False"):
*da0073e9SAndroid Build Coastguard Worker            # Cannot remove a parametrization with multiple inputs and not leave it parametrized
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker                model, "weight", leave_parametrized=False
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        # Remove parametrization and check consistency
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=True)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(torch.linalg.matrix_rank(model.weight).item(), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Registering parametrizations with one input on top of one with multiple inputs should work
*da0073e9SAndroid Build Coastguard Worker        init_weight = model.weight.clone()
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", RankOne())
*da0073e9SAndroid Build Coastguard Worker        # Projecting a rank 1 matrix onto the matrices of rank one does not change the matrix
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(init_weight, model.weight)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Double())
*da0073e9SAndroid Build Coastguard Worker        # The matrix now is twice the initial matrix
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(2.0 * init_weight, model.weight)
*da0073e9SAndroid Build Coastguard Worker        # Multiplying by a scalar does not change the rank
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(torch.linalg.matrix_rank(model.weight).item(), 1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # The model has now three parameters
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(list(model.parameters())), 3)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        sgd = torch.optim.SGD(model.parameters(), lr=0.1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test backward. Should not throw
*da0073e9SAndroid Build Coastguard Worker        for _ in range(2):
*da0073e9SAndroid Build Coastguard Worker            sgd.zero_grad()
*da0073e9SAndroid Build Coastguard Worker            loss = (model.weight.T @ model.bias).sum()
*da0073e9SAndroid Build Coastguard Worker            loss.backward()
*da0073e9SAndroid Build Coastguard Worker            sgd.step()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Same drill as before, removing should work as expected
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "leave_parametrized=False"):
*da0073e9SAndroid Build Coastguard Worker            # Cannot remove a parametrization with multiple inputs and not leave it parametrized
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker                model, "weight", leave_parametrized=False
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        # Remove parametrization and check consistency
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(model, "weight", leave_parametrized=True)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(torch.linalg.matrix_rank(model.weight).item(), 1)
*da0073e9SAndroid Build Coastguard Worker        self.assertIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # The model has now two parameters
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(len(list(model.parameters())), 2)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test backward. Should not throw
*da0073e9SAndroid Build Coastguard Worker        sgd = torch.optim.SGD(model.parameters(), lr=0.1)
*da0073e9SAndroid Build Coastguard Worker        for _ in range(2):
*da0073e9SAndroid Build Coastguard Worker            sgd.zero_grad()
*da0073e9SAndroid Build Coastguard Worker            loss = (model.weight.T @ model.bias).sum()
*da0073e9SAndroid Build Coastguard Worker            loss.backward()
*da0073e9SAndroid Build Coastguard Worker            sgd.step()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    #        and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_caching_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test the caching system of a parametrization"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Define a couple matrix parametrizations
*da0073e9SAndroid Build Coastguard Worker        class Skew(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                X = X.tril(-1)
*da0073e9SAndroid Build Coastguard Worker                return X - X.T
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Orthogonal(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                Id = torch.eye(X.size(0), device=X.device)
*da0073e9SAndroid Build Coastguard Worker                return torch.linalg.solve(Id + X, Id - X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(5, 5)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Orthogonal())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Test that the caching system works
*da0073e9SAndroid Build Coastguard Worker        with parametrize.cached():
*da0073e9SAndroid Build Coastguard Worker            X = model.weight
*da0073e9SAndroid Build Coastguard Worker            Y = model.weight
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(X), id(Y))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    #        and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_caching_parametrization_with_transfer_parametrizations_and_params(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that transferring parametrizations doesn't cause issues with caching"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Skew(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                X = X.tril(-1)
*da0073e9SAndroid Build Coastguard Worker                return X - X.T
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Orthogonal(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                Id = torch.eye(X.size(0), device=X.device)
*da0073e9SAndroid Build Coastguard Worker                return torch.linalg.solve(Id + X, Id - X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(5, 5)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Skew())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Orthogonal())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        to_model = nn.Linear(5, 5)
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with parametrize.cached():
*da0073e9SAndroid Build Coastguard Worker            X = model.weight
*da0073e9SAndroid Build Coastguard Worker            Y = model.weight
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(X), id(Y))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            A = to_model.weight
*da0073e9SAndroid Build Coastguard Worker            B = to_model.weight
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(A), id(B))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test that the results are distinct objects for each module
*da0073e9SAndroid Build Coastguard Worker            self.assertNotEqual(id(A), id(X))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_parametrization_same_training_mode(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test training mode updated on parametrization registration"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Identity(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return X
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        module = nn.Linear(4, 4)
*da0073e9SAndroid Build Coastguard Worker        module.eval()
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(module, "weight", Identity())
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(module.parametrizations.weight[0].training)
*da0073e9SAndroid Build Coastguard Worker        module.train()
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(module, "weight", Identity().eval())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(module.parametrizations.weight[0].training)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(module.parametrizations.weight[1].training)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_type_before_parametrizations(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that type_before_parametrizations always retrieves original type"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Identity(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return X
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(5, 5)
*da0073e9SAndroid Build Coastguard Worker        original_type = type(model)
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(
*da0073e9SAndroid Build Coastguard Worker            parametrize.type_before_parametrizations(model) == original_type
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Identity())
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(
*da0073e9SAndroid Build Coastguard Worker            parametrize.type_before_parametrizations(model) == original_type
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_deepcopy_after_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that we are able to create a deepcopy of the module when it's parametrized."""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class AddOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x + 1.0
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class ModelWithoutDeepcopy(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def __init__(self) -> None:
*da0073e9SAndroid Build Coastguard Worker                super().__init__()
*da0073e9SAndroid Build Coastguard Worker                self.weight = nn.Parameter(
*da0073e9SAndroid Build Coastguard Worker                    torch.tensor([1.0, 1.0, 1.0, 1.0]), requires_grad=True
*da0073e9SAndroid Build Coastguard Worker                )
*da0073e9SAndroid Build Coastguard Worker                self.bias = nn.Parameter(
*da0073e9SAndroid Build Coastguard Worker                    torch.tensor([0.0, 0.0, 0.0, 0.0]), requires_grad=True
*da0073e9SAndroid Build Coastguard Worker                )
*da0073e9SAndroid Build Coastguard Worker                self.attr = [1.0, 2.0, 3.0, 4.0]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class ActualModel(ModelWithoutDeepcopy):
*da0073e9SAndroid Build Coastguard Worker            # Emulate custom implementation of the deepcopying.
*da0073e9SAndroid Build Coastguard Worker            def __deepcopy__(self, memo):
*da0073e9SAndroid Build Coastguard Worker                result = self.__new__(self.__class__)
*da0073e9SAndroid Build Coastguard Worker                memo[id(self)] = result
*da0073e9SAndroid Build Coastguard Worker                result.__dict__ = deepcopy(self.__dict__, memo)
*da0073e9SAndroid Build Coastguard Worker                return result
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        def check_deepcopy(m1: nn.Module, m2: nn.Module):
*da0073e9SAndroid Build Coastguard Worker            w1 = m1.parametrizations.weight.original
*da0073e9SAndroid Build Coastguard Worker            w2 = m2.parametrizations.weight.original
*da0073e9SAndroid Build Coastguard Worker            b1 = (
*da0073e9SAndroid Build Coastguard Worker                m1.parametrizations.bias.original
*da0073e9SAndroid Build Coastguard Worker                if parametrize.is_parametrized(m1, "bias")
*da0073e9SAndroid Build Coastguard Worker                else m1.bias
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            b2 = (
*da0073e9SAndroid Build Coastguard Worker                m2.parametrizations.bias.original
*da0073e9SAndroid Build Coastguard Worker                if parametrize.is_parametrized(m2, "bias")
*da0073e9SAndroid Build Coastguard Worker                else m2.bias
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            # Weights, biases and attributes should be equal but they must be different objects.
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m1.__dict__.keys(), m2.__dict__.keys())
*da0073e9SAndroid Build Coastguard Worker            self.assertIsNot(m1, m2)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(w1, w2)
*da0073e9SAndroid Build Coastguard Worker            self.assertIsNot(w1, w2)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(b1, b2)
*da0073e9SAndroid Build Coastguard Worker            self.assertIsNot(b1, b2)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m1.attr, m2.attr)
*da0073e9SAndroid Build Coastguard Worker            self.assertIsNot(m1.attr, m2.attr)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        for model in (ModelWithoutDeepcopy(), ActualModel()):
*da0073e9SAndroid Build Coastguard Worker            # General check that we are able to create deepcopy.
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "weight", AddOne())
*da0073e9SAndroid Build Coastguard Worker            check_deepcopy(model, deepcopy(model))
*da0073e9SAndroid Build Coastguard Worker            # Check that this works on models with several parametrized tensors.
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "bias", AddOne())
*da0073e9SAndroid Build Coastguard Worker            check_deepcopy(model, deepcopy(model))
*da0073e9SAndroid Build Coastguard Worker            # Check that this works on models where tensors have more than one parametrization.
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "weight", AddOne())
*da0073e9SAndroid Build Coastguard Worker            check_deepcopy(model, deepcopy(model))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_transfer_parametrizations_and_params(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that all parametrizations and their associated parameters are transferred."""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class AddOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x + 1.0
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Double(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 2.0 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 0.5 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class MinusOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x - 1.0
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(5, 5)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", AddOne())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Double())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", MinusOne())
*da0073e9SAndroid Build Coastguard Worker        hold_weight = model.weight
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        to_model = torch.ao.nn.qat.Linear(
*da0073e9SAndroid Build Coastguard Worker            5, 5, qconfig=torch.ao.quantization.get_default_qconfig()
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # checks that final and original value are correct and the to_model is parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(torch.nn.utils.parametrize.is_parametrized(to_model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, to_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.weight.original,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.weight.original,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that the transfer didn't affect the original value
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(hold_weight, model.weight)
*da0073e9SAndroid Build Coastguard Worker        if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker            # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker            # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker            del hold_weight
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # testing that changes to one set of parametrizations do not affect the other
*da0073e9SAndroid Build Coastguard Worker        parametrize.remove_parametrizations(to_model, "weight")
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(torch.nn.utils.parametrize.is_parametrized(to_model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(torch.nn.utils.parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # also test that parameters that don't exist in to_model get transferred
*da0073e9SAndroid Build Coastguard Worker        model.test_param = Parameter(torch.randn(5, 5))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(not hasattr(to_model, "test_param"))
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "test_param", Double())
*da0073e9SAndroid Build Coastguard Worker        hold_test_param = model.test_param
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model, "test_param")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that previously missing params got transferred correctly
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.test_param, to_model.test_param)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.test_param.original,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.test_param.original,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that the new transfer didn't change the value for the from_module
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(hold_test_param, model.test_param)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_transfer_parametrizations_and_params_right_inverse(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that all parametrizations and their associated parameters are transferred."""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Double(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 2.0 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 0.5 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(5, 5)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Double())
*da0073e9SAndroid Build Coastguard Worker        hold_weight = model.weight
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        to_model = torch.ao.nn.qat.Linear(
*da0073e9SAndroid Build Coastguard Worker            5, 5, qconfig=torch.ao.quantization.get_default_qconfig()
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that transfer occurs successfully
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, to_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.weight.original,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.weight.original,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that transfer doesn't affect the from_model weight
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(hold_weight, model.weight)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_transfer_parametrizations_and_params_single_param(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that all parametrizations and their associated parameters are transferred."""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class AddOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x + 1.0
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Double(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 2.0 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class MinusOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return x - 1.0
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(5, 5, bias=True)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", AddOne())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Double())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", MinusOne())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", AddOne())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", Double())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "bias", MinusOne())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        to_model = torch.ao.nn.qat.Linear(
*da0073e9SAndroid Build Coastguard Worker            5, 5, bias=True, qconfig=torch.ao.quantization.get_default_qconfig()
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model, "weight")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that weight and only weight was transferred
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, to_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.weight.original,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.weight.original,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue("bias" not in to_model.parametrizations)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # FIXME: Rewrite this test using functions not depending on LAPACK
*da0073e9SAndroid Build Coastguard Worker    # and remove the `@skipIfNoLapack` (see #70995)
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_transfer_parametrizations_and_params_many_to_one(self):
*da0073e9SAndroid Build Coastguard Worker        # A parametrization with several outputs
*da0073e9SAndroid Build Coastguard Worker        class RankOne(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x, y):
*da0073e9SAndroid Build Coastguard Worker                # Form a rank-1 matrix from a pair of vectors
*da0073e9SAndroid Build Coastguard Worker                return x.unsqueeze(-1) @ y.unsqueeze(-2)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, Y):
*da0073e9SAndroid Build Coastguard Worker                # We project the given matrix onto the rank 1 matrices
*da0073e9SAndroid Build Coastguard Worker                U, S, Vh = torch.linalg.svd(Y, full_matrices=False)
*da0073e9SAndroid Build Coastguard Worker                # S is ordered in a decreasing way.
*da0073e9SAndroid Build Coastguard Worker                s0_sqrt = S[0].sqrt().unsqueeze(-1)
*da0073e9SAndroid Build Coastguard Worker                return U[..., :, 0] * s0_sqrt, Vh[..., 0, :] * s0_sqrt
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class Double(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x):
*da0073e9SAndroid Build Coastguard Worker                return 2.0 * x
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(3, 3)
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", RankOne())
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", Double())
*da0073e9SAndroid Build Coastguard Worker        hold_weight = model.weight
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        to_model = torch.ao.nn.qat.Linear(
*da0073e9SAndroid Build Coastguard Worker            3, 3, qconfig=torch.ao.quantization.get_default_qconfig()
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # checks that final and original value are correct and the to_model is parametrized
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(torch.nn.utils.parametrize.is_parametrized(to_model, "weight"))
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.weight, to_model.weight)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.weight.original0,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.weight.original0,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.weight.original1,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.weight.original1,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that the transfer didn't affect the original value
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(hold_weight, model.weight)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # testing that changes to one set of parametrizations do not affect the other
*da0073e9SAndroid Build Coastguard Worker        model.test_param = Parameter(torch.randn(3, 3))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        self.assertTrue(not hasattr(to_model, "test_param"))
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "test_param", RankOne())
*da0073e9SAndroid Build Coastguard Worker        hold_test_param = model.test_param
*da0073e9SAndroid Build Coastguard Worker        parametrize.transfer_parametrizations_and_params(model, to_model, "test_param")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # also check that previously missing params got transferred correctly
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(model.test_param, to_model.test_param)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.test_param.original0,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.test_param.original0,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            model.parametrizations.test_param.original1,
*da0073e9SAndroid Build Coastguard Worker            to_model.parametrizations.test_param.original1,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # check that the new transfer didn't change the value for the from_module
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(hold_test_param, model.test_param)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_new_spectral_norm(self):
*da0073e9SAndroid Build Coastguard Worker        with set_default_dtype(torch.double):
*da0073e9SAndroid Build Coastguard Worker            input = torch.randn(3, 5)
*da0073e9SAndroid Build Coastguard Worker            m = nn.Linear(5, 7)
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker            spectral_norm_m = m.parametrizations.weight[0]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(spectral_norm_m._u.size(), torch.Size([m.weight.size(0)]))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # .parametrizations.weight.original should be trainable
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(hasattr(m.parametrizations.weight, "original"))
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue("original" in m.parametrizations.weight._parameters)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # u should be just a reused buffer
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(hasattr(spectral_norm_m, "_u"))
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue("_u" in spectral_norm_m._buffers)
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue("_v" in spectral_norm_m._buffers)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # weight should be a plain attribute, not counted as a buffer or a param
*da0073e9SAndroid Build Coastguard Worker            self.assertIsNotNone(m.weight)
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse("weight" in m._buffers)
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse("weight" in m._parameters)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # it should also be sharing storage as `weight_orig`
*da0073e9SAndroid Build Coastguard Worker            # self.assertEqual(m.parametrizations.weight.original.storage(), m.weight.storage())
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m.parametrizations.weight.original.size(), m.weight.size())
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker                m.parametrizations.weight.original.stride(), m.weight.stride()
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrize.remove_parametrizations(m, "weight")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # spectral_norm is the only parametrization
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(hasattr(m, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue("weight" in m._parameters)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # We can register spectral_norm multiple times on the same parameter
*da0073e9SAndroid Build Coastguard Worker            # and on multiple parameters in the same module
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.spectral_norm(m, "weight")
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.spectral_norm(m, "weight")
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.spectral_norm(m, "bias")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # If we remove the parametrization on bias, weight is still parametrized
*da0073e9SAndroid Build Coastguard Worker            # Removing a parametrization runs forward in eval mode if leave_parametrized=True
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrize.remove_parametrizations(m, "bias")
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue("bias" in m._parameters)
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(hasattr(m, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse("weight" in m._parameters)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrize.remove_parametrizations(m, "weight")
*da0073e9SAndroid Build Coastguard Worker            # Neither weight and bias are parametrized
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(hasattr(m, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue("weight" in m._parameters)
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(torch.nn.utils.parametrize.is_parametrized(m))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test correctness in training/eval modes and cpu/multi-gpu settings
*da0073e9SAndroid Build Coastguard Worker            for apply_dp in (True, False):
*da0073e9SAndroid Build Coastguard Worker                if apply_dp:
*da0073e9SAndroid Build Coastguard Worker                    if not TEST_MULTIGPU:
*da0073e9SAndroid Build Coastguard Worker                        continue
*da0073e9SAndroid Build Coastguard Worker                    device = torch.device("cuda:0")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    def maybe_wrap(m):
*da0073e9SAndroid Build Coastguard Worker                        return torch.nn.DataParallel(m, [0, 1])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                else:
*da0073e9SAndroid Build Coastguard Worker                    device = torch.device("cpu")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    def maybe_wrap(m):
*da0073e9SAndroid Build Coastguard Worker                        return m
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                for requires_grad in (True, False):
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    def get_modules():
*da0073e9SAndroid Build Coastguard Worker                        m = nn.Linear(3, 4).to(device)
*da0073e9SAndroid Build Coastguard Worker                        m.weight.requires_grad_(requires_grad)
*da0073e9SAndroid Build Coastguard Worker                        m = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker                        wrapped_m = maybe_wrap(m)
*da0073e9SAndroid Build Coastguard Worker                        spectral_norm_m = m.parametrizations.weight[0]
*da0073e9SAndroid Build Coastguard Worker                        return m, wrapped_m, spectral_norm_m
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    input = torch.randn(2, 3, device=device)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    m, wrapped_m, spectral_norm_m = get_modules()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    self.assertTrue(hasattr(spectral_norm_m, "_u"))
*da0073e9SAndroid Build Coastguard Worker                    u0 = spectral_norm_m._u.clone()
*da0073e9SAndroid Build Coastguard Worker                    v0 = spectral_norm_m._v.clone()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # TEST TRAINING BEHAVIOR
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # We perform GD first to modify the initial matrix
*da0073e9SAndroid Build Coastguard Worker                    opt = torch.optim.SGD(wrapped_m.parameters(), lr=0.1)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    opt.zero_grad()
*da0073e9SAndroid Build Coastguard Worker                    wrapped_m(input).sum().backward()
*da0073e9SAndroid Build Coastguard Worker                    opt.step()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    out = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                    if requires_grad:
*da0073e9SAndroid Build Coastguard Worker                        # run forward again and assert that u and v are updated
*da0073e9SAndroid Build Coastguard Worker                        self.assertNotEqual(u0, spectral_norm_m._u)
*da0073e9SAndroid Build Coastguard Worker                        self.assertNotEqual(v0, spectral_norm_m._v)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # assert that backprop reaches original weight
*da0073e9SAndroid Build Coastguard Worker                    # can't use gradcheck because the function changes as we
*da0073e9SAndroid Build Coastguard Worker                    # activate through it in training mode
*da0073e9SAndroid Build Coastguard Worker                    if requires_grad:
*da0073e9SAndroid Build Coastguard Worker                        torch.autograd.grad(
*da0073e9SAndroid Build Coastguard Worker                            out.sum(), m.parametrizations.weight.original
*da0073e9SAndroid Build Coastguard Worker                        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # test backward works with multiple forwards
*da0073e9SAndroid Build Coastguard Worker                    # it uses training mode so we need to reset `u` and `v` vectors
*da0073e9SAndroid Build Coastguard Worker                    # to same value at beginning for finite difference test to pass
*da0073e9SAndroid Build Coastguard Worker                    saved_u = spectral_norm_m._u.clone()
*da0073e9SAndroid Build Coastguard Worker                    saved_v = spectral_norm_m._v.clone()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    def fn(input):
*da0073e9SAndroid Build Coastguard Worker                        spectral_norm_m._u.data.copy_(saved_u)
*da0073e9SAndroid Build Coastguard Worker                        spectral_norm_m._v.data.copy_(saved_v)
*da0073e9SAndroid Build Coastguard Worker                        out0 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                        out1 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                        return out0 + out1
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # Make sure we can compute gradients wrt to all the parameters in the case
*da0073e9SAndroid Build Coastguard Worker                    # of double forward
*da0073e9SAndroid Build Coastguard Worker                    fn(input.clone().requires_grad_()).sum().backward()
*da0073e9SAndroid Build Coastguard Worker                    gradcheck(
*da0073e9SAndroid Build Coastguard Worker                        fn, (input.clone().requires_grad_(),), check_batched_grad=False
*da0073e9SAndroid Build Coastguard Worker                    )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # test removing
*da0073e9SAndroid Build Coastguard Worker                    # spectral norm module needs to be in eval mode if we'd like to
*da0073e9SAndroid Build Coastguard Worker                    # avoid doing another power iteration
*da0073e9SAndroid Build Coastguard Worker                    m, wrapped_m, _ = get_modules()
*da0073e9SAndroid Build Coastguard Worker                    pre_remove_out = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                    if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker                        # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker                        # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker                        pre_remove_out_ref = pre_remove_out.detach()
*da0073e9SAndroid Build Coastguard Worker                        del pre_remove_out
*da0073e9SAndroid Build Coastguard Worker                    else:
*da0073e9SAndroid Build Coastguard Worker                        pre_remove_out_ref = pre_remove_out
*da0073e9SAndroid Build Coastguard Worker                    m.eval()
*da0073e9SAndroid Build Coastguard Worker                    m = torch.nn.utils.parametrize.remove_parametrizations(m, "weight")
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(wrapped_m(input), pre_remove_out_ref)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker                    for _ in range(3):
*da0073e9SAndroid Build Coastguard Worker                        pre_remove_out = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                    if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker                        # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker                        # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker                        pre_remove_out_ref = pre_remove_out.detach()
*da0073e9SAndroid Build Coastguard Worker                        del pre_remove_out
*da0073e9SAndroid Build Coastguard Worker                    else:
*da0073e9SAndroid Build Coastguard Worker                        pre_remove_out_ref = pre_remove_out
*da0073e9SAndroid Build Coastguard Worker                    m.eval()
*da0073e9SAndroid Build Coastguard Worker                    m = torch.nn.utils.parametrize.remove_parametrizations(m, "weight")
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(wrapped_m(input), pre_remove_out_ref)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # TEST EVAL BEHAVIOR
*da0073e9SAndroid Build Coastguard Worker                    m, wrapped_m, spectral_norm_m = get_modules()
*da0073e9SAndroid Build Coastguard Worker                    wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                    last_train_out = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                    last_train_u = spectral_norm_m._u.clone()
*da0073e9SAndroid Build Coastguard Worker                    last_train_v = spectral_norm_m._v.clone()
*da0073e9SAndroid Build Coastguard Worker                    wrapped_m.zero_grad()
*da0073e9SAndroid Build Coastguard Worker                    wrapped_m.eval()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    eval_out0 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                    # assert eval gives same result as last training iteration
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(eval_out0, last_train_out)
*da0073e9SAndroid Build Coastguard Worker                    # assert doing more iteartion in eval don't change things
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(eval_out0, wrapped_m(input))
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(last_train_u, spectral_norm_m._u)
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(last_train_v, spectral_norm_m._v)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # FIXME: the code below is flaky when executed with DataParallel
*da0073e9SAndroid Build Coastguard Worker                    # see https://github.com/pytorch/pytorch/issues/13818
*da0073e9SAndroid Build Coastguard Worker                    if apply_dp:
*da0073e9SAndroid Build Coastguard Worker                        continue
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # test backward works with multiple forwards in mixed training
*da0073e9SAndroid Build Coastguard Worker                    # and eval modes
*da0073e9SAndroid Build Coastguard Worker                    # it uses training mode so we need to reset `u` and `v` vectors
*da0073e9SAndroid Build Coastguard Worker                    # to same value at beginning for finite difference test to pass
*da0073e9SAndroid Build Coastguard Worker                    saved_u = spectral_norm_m._u.clone()
*da0073e9SAndroid Build Coastguard Worker                    saved_v = spectral_norm_m._v.clone()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    def fn(input):
*da0073e9SAndroid Build Coastguard Worker                        spectral_norm_m._u.data.copy_(saved_u)
*da0073e9SAndroid Build Coastguard Worker                        spectral_norm_m._v.data.copy_(saved_v)
*da0073e9SAndroid Build Coastguard Worker                        wrapped_m.train()
*da0073e9SAndroid Build Coastguard Worker                        out0 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                        wrapped_m.eval()
*da0073e9SAndroid Build Coastguard Worker                        out1 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                        wrapped_m.train()
*da0073e9SAndroid Build Coastguard Worker                        out2 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                        wrapped_m.eval()
*da0073e9SAndroid Build Coastguard Worker                        out3 = wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker                        return out0 + out1 + out2 + out3
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    gradcheck(fn, (input.clone().requires_grad_(),))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                    # assert that backprop reaches weight_orig in eval
*da0073e9SAndroid Build Coastguard Worker                    if requires_grad:
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                        def fn(weight):
*da0073e9SAndroid Build Coastguard Worker                            return wrapped_m(input)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                        gradcheck(fn, (m.parametrizations.weight.original,))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def test_register_parametrization_no_grad(self):
*da0073e9SAndroid Build Coastguard Worker        r"""Test that it is possible to register a parametrization without gradient"""
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class SplitAndCat(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, x):
*da0073e9SAndroid Build Coastguard Worker                # split the tensor in two halfs
*da0073e9SAndroid Build Coastguard Worker                return torch.split(x, x.shape[1] // 2)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def forward(self, x0, x1):
*da0073e9SAndroid Build Coastguard Worker                return torch.cat([x0, x1])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model = nn.Linear(8, 8)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        model.weight.requires_grad = False
*da0073e9SAndroid Build Coastguard Worker        parametrize.register_parametrization(model, "weight", SplitAndCat())
*da0073e9SAndroid Build Coastguard Worker        # making sure the parameterized and decomposed Tensors both have requires_grad == False
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(model.weight.requires_grad)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(model.parametrizations.weight.original0.requires_grad)
*da0073e9SAndroid Build Coastguard Worker        self.assertFalse(model.parametrizations.weight.original1.requires_grad)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_new_spectral_norm_load_state_dict(self):
*da0073e9SAndroid Build Coastguard Worker        for activate_times in (0, 3):
*da0073e9SAndroid Build Coastguard Worker            inp = torch.randn(2, 3)
*da0073e9SAndroid Build Coastguard Worker            m = nn.Linear(3, 5)
*da0073e9SAndroid Build Coastguard Worker            snm = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker            snm.train()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            for _ in range(activate_times):
*da0073e9SAndroid Build Coastguard Worker                snm(inp)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            state_dict = deepcopy(snm.state_dict())
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker                {
*da0073e9SAndroid Build Coastguard Worker                    "parametrizations.weight.original",
*da0073e9SAndroid Build Coastguard Worker                    "bias",
*da0073e9SAndroid Build Coastguard Worker                    "parametrizations.weight.0._v",
*da0073e9SAndroid Build Coastguard Worker                    "parametrizations.weight.0._u",
*da0073e9SAndroid Build Coastguard Worker                },
*da0073e9SAndroid Build Coastguard Worker                set(state_dict.keys()),
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test that non-strict loading works
*da0073e9SAndroid Build Coastguard Worker            non_strict_state_dict = deepcopy(state_dict)
*da0073e9SAndroid Build Coastguard Worker            non_strict_state_dict["nonsense"] = "nonsense"
*da0073e9SAndroid Build Coastguard Worker            with self.assertRaisesRegex(
*da0073e9SAndroid Build Coastguard Worker                RuntimeError, r'Unexpected key\(s\) in state_dict: "nonsense"'
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                snm.load_state_dict(non_strict_state_dict, strict=True)
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            del non_strict_state_dict["parametrizations.weight.original"]
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            del non_strict_state_dict["parametrizations.weight.0._u"]
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            del non_strict_state_dict["parametrizations.weight.0._v"]
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            non_strict_state_dict[
*da0073e9SAndroid Build Coastguard Worker                "weight"
*da0073e9SAndroid Build Coastguard Worker            ] = snm.weight.detach().clone()  # set W as a buffer
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            del non_strict_state_dict._metadata[
*da0073e9SAndroid Build Coastguard Worker                "parametrizations.weight.0"
*da0073e9SAndroid Build Coastguard Worker            ]  # remove metadata info
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            del non_strict_state_dict["weight"]  # remove W buffer
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker            del non_strict_state_dict["bias"]
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(non_strict_state_dict, strict=False)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # normal state_dict
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test that re-wrapping does not matter
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrize.remove_parametrizations(snm, "weight")
*da0073e9SAndroid Build Coastguard Worker            snm = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(state_dict)
*da0073e9SAndroid Build Coastguard Worker            with torch.no_grad():
*da0073e9SAndroid Build Coastguard Worker                snm.eval()
*da0073e9SAndroid Build Coastguard Worker                out0_eval = snm(inp)
*da0073e9SAndroid Build Coastguard Worker                snm.train()
*da0073e9SAndroid Build Coastguard Worker                out1_train = snm(inp)
*da0073e9SAndroid Build Coastguard Worker                out2_train = snm(inp)
*da0073e9SAndroid Build Coastguard Worker                snm.eval()
*da0073e9SAndroid Build Coastguard Worker                out3_eval = snm(inp)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test that re-wrapping does not matter
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrize.remove_parametrizations(snm, "weight")
*da0073e9SAndroid Build Coastguard Worker            snm = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # Test normal loading
*da0073e9SAndroid Build Coastguard Worker            snm.load_state_dict(state_dict)
*da0073e9SAndroid Build Coastguard Worker            with torch.no_grad():
*da0073e9SAndroid Build Coastguard Worker                snm.eval()
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(out0_eval, snm(inp))
*da0073e9SAndroid Build Coastguard Worker                snm.train()
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(out1_train, snm(inp))
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(out2_train, snm(inp))
*da0073e9SAndroid Build Coastguard Worker                snm.eval()
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(out3_eval, snm(inp))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_new_spectral_norm_dim(self):
*da0073e9SAndroid Build Coastguard Worker        inp = torch.randn(2, 3, 10, 12)
*da0073e9SAndroid Build Coastguard Worker        m = nn.ConvTranspose2d(3, 4, (5, 6))
*da0073e9SAndroid Build Coastguard Worker        m = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker        snm = m.parametrizations.weight[0]
*da0073e9SAndroid Build Coastguard Worker        # this should not run into incompatible shapes
*da0073e9SAndroid Build Coastguard Worker        x = m(inp)
*da0073e9SAndroid Build Coastguard Worker        # check that u refers to the same dimension
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker            snm._u.shape, m.parametrizations.weight.original[0, :, 0, 0].shape
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_new_spectral_norm_forward(self):
*da0073e9SAndroid Build Coastguard Worker        input = torch.randn(3, 5)
*da0073e9SAndroid Build Coastguard Worker        m = nn.Linear(5, 7)
*da0073e9SAndroid Build Coastguard Worker        m = torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker        snm = m.parametrizations.weight[0]
*da0073e9SAndroid Build Coastguard Worker        # naive forward
*da0073e9SAndroid Build Coastguard Worker        _weight = m.parametrizations.weight.original
*da0073e9SAndroid Build Coastguard Worker        _bias, _v = m.bias, snm._v
*da0073e9SAndroid Build Coastguard Worker        _weight_mat = _weight.view(_weight.size(0), -1)
*da0073e9SAndroid Build Coastguard Worker        _u = torch.mv(_weight_mat, _v)
*da0073e9SAndroid Build Coastguard Worker        _u = F.normalize(_u, dim=0, eps=1e-12)
*da0073e9SAndroid Build Coastguard Worker        _v = torch.mv(_weight_mat.t(), _u)
*da0073e9SAndroid Build Coastguard Worker        _v = F.normalize(_v, dim=0, eps=1e-12)
*da0073e9SAndroid Build Coastguard Worker        _weight.data /= torch.dot(_u, torch.matmul(_weight_mat, _v))
*da0073e9SAndroid Build Coastguard Worker        out_hat = torch.nn.functional.linear(input, _weight, _bias)
*da0073e9SAndroid Build Coastguard Worker        expect_out = m(input)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(expect_out, out_hat)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    @skipIfTorchDynamo("Test does not work with TorchDynamo")
*da0073e9SAndroid Build Coastguard Worker    def test_new_spectral_norm_value(self):
*da0073e9SAndroid Build Coastguard Worker        # a test that the spectral norm (= top singular value)
*da0073e9SAndroid Build Coastguard Worker        # is in fact properly calculated, using example of a simple diagonal matrix.
*da0073e9SAndroid Build Coastguard Worker        for dtype in (torch.float, torch.cfloat):
*da0073e9SAndroid Build Coastguard Worker            m = nn.Linear(2, 2, dtype=dtype)
*da0073e9SAndroid Build Coastguard Worker            with torch.no_grad():
*da0073e9SAndroid Build Coastguard Worker                # set weight to be diagonal
*da0073e9SAndroid Build Coastguard Worker                x = torch.diagonal(m.weight)
*da0073e9SAndroid Build Coastguard Worker                m.weight = nn.Parameter(torch.diag(x))
*da0073e9SAndroid Build Coastguard Worker                torch.nn.utils.parametrizations.spectral_norm(m)
*da0073e9SAndroid Build Coastguard Worker                # weights should be rescaled by spectral norm, (i.e., largest diagonal element in norm)
*da0073e9SAndroid Build Coastguard Worker                expected = torch.diag(x / x.abs().max())
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(m.weight.data, expected)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_orthogonal_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        # Orthogonal implements 6 algorithms (3x parametrizations times 2 options of use_trivialization)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        def assert_is_orthogonal(X):
*da0073e9SAndroid Build Coastguard Worker            n, k = X.size(-2), X.size(-1)
*da0073e9SAndroid Build Coastguard Worker            if n < k:
*da0073e9SAndroid Build Coastguard Worker                X = X.mT
*da0073e9SAndroid Build Coastguard Worker                n, k = k, n
*da0073e9SAndroid Build Coastguard Worker            Id = torch.eye(k, dtype=X.dtype, device=X.device).expand(
*da0073e9SAndroid Build Coastguard Worker                *(X.size()[:-2]), k, k
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            eps = 10 * n * torch.finfo(X.dtype).eps
*da0073e9SAndroid Build Coastguard Worker            torch.testing.assert_close(X.mH @ X, Id, atol=eps, rtol=0.0)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        def assert_weight_allclose_Q(weight, W):
*da0073e9SAndroid Build Coastguard Worker            # Test that weight is equal to the Q part of the QR decomposition of W
*da0073e9SAndroid Build Coastguard Worker            # (or of its transpose if the matrix is wide)
*da0073e9SAndroid Build Coastguard Worker            wide_matrix = W.size(-2) < W.size(-1)
*da0073e9SAndroid Build Coastguard Worker            if wide_matrix:
*da0073e9SAndroid Build Coastguard Worker                W = W.mT
*da0073e9SAndroid Build Coastguard Worker            Q, R = torch.linalg.qr(W)
*da0073e9SAndroid Build Coastguard Worker            Q *= R.diagonal(dim1=-2, dim2=-1).sgn().unsqueeze(-2)
*da0073e9SAndroid Build Coastguard Worker            if wide_matrix:
*da0073e9SAndroid Build Coastguard Worker                Q = Q.mT
*da0073e9SAndroid Build Coastguard Worker            torch.testing.assert_close(Q, weight, atol=1e-5, rtol=0.0)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        for shape, dtype, use_linear in product(
*da0073e9SAndroid Build Coastguard Worker            ((4, 4), (5, 3), (3, 5)),  # square/ tall / wide
*da0073e9SAndroid Build Coastguard Worker            (torch.float32, torch.complex64),
*da0073e9SAndroid Build Coastguard Worker            (True, False),
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            # Conv2d does not support complex yet
*da0073e9SAndroid Build Coastguard Worker            if not use_linear:
*da0073e9SAndroid Build Coastguard Worker                continue
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            if use_linear:
*da0073e9SAndroid Build Coastguard Worker                input = torch.randn(3, shape[0], dtype=dtype)
*da0073e9SAndroid Build Coastguard Worker            else:
*da0073e9SAndroid Build Coastguard Worker                input = torch.randn(2, 2, shape[0] + 2, shape[1] + 1, dtype=dtype)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            for parametrization, use_trivialization in product(
*da0073e9SAndroid Build Coastguard Worker                ("matrix_exp", "cayley", "householder"), (False, True)
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                # right_inverse for Cayley and matrix_exp not implemented for use_trivialization=False
*da0073e9SAndroid Build Coastguard Worker                # See Note [right_inverse expm cayley]
*da0073e9SAndroid Build Coastguard Worker                can_initialize = use_trivialization or parametrization == "householder"
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                # We generate them every time to always start with fresh weights
*da0073e9SAndroid Build Coastguard Worker                if use_linear:
*da0073e9SAndroid Build Coastguard Worker                    m = nn.Linear(*shape, dtype=dtype)
*da0073e9SAndroid Build Coastguard Worker                else:
*da0073e9SAndroid Build Coastguard Worker                    m = nn.Conv2d(2, 3, shape, dtype=dtype)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                # We do not support householder for complex inputs
*da0073e9SAndroid Build Coastguard Worker                # See Note [Householder complex]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                # When using the swap_tensors path, this is needed so that the autograd
*da0073e9SAndroid Build Coastguard Worker                # graph is not alive anymore.
*da0073e9SAndroid Build Coastguard Worker                if get_swap_module_params_on_conversion():
*da0073e9SAndroid Build Coastguard Worker                    w_init = m.weight.clone().detach()
*da0073e9SAndroid Build Coastguard Worker                else:
*da0073e9SAndroid Build Coastguard Worker                    w_init = m.weight.clone()
*da0073e9SAndroid Build Coastguard Worker                if parametrization == "householder" and m.weight.is_complex():
*da0073e9SAndroid Build Coastguard Worker                    msg = "householder parametrization does not support complex tensors"
*da0073e9SAndroid Build Coastguard Worker                    with self.assertRaisesRegex(ValueError, msg):
*da0073e9SAndroid Build Coastguard Worker                        torch.nn.utils.parametrizations.orthogonal(
*da0073e9SAndroid Build Coastguard Worker                            m,
*da0073e9SAndroid Build Coastguard Worker                            "weight",
*da0073e9SAndroid Build Coastguard Worker                            parametrization,
*da0073e9SAndroid Build Coastguard Worker                            use_trivialization=use_trivialization,
*da0073e9SAndroid Build Coastguard Worker                        )
*da0073e9SAndroid Build Coastguard Worker                    continue
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                wide_matrix = w_init.size(-2) < w_init.size(-1)
*da0073e9SAndroid Build Coastguard Worker                torch.nn.utils.parametrizations.orthogonal(
*da0073e9SAndroid Build Coastguard Worker                    m, "weight", parametrization, use_trivialization=use_trivialization
*da0073e9SAndroid Build Coastguard Worker                )
*da0073e9SAndroid Build Coastguard Worker                # Forwards works as expected
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(w_init.shape, m.weight.shape)
*da0073e9SAndroid Build Coastguard Worker                assert_is_orthogonal(m.weight)
*da0073e9SAndroid Build Coastguard Worker                if can_initialize:
*da0073e9SAndroid Build Coastguard Worker                    assert_weight_allclose_Q(m.weight, w_init)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                # Intializing with a given orthogonal matrix works
*da0073e9SAndroid Build Coastguard Worker                X = torch.randn_like(m.weight)
*da0073e9SAndroid Build Coastguard Worker                if wide_matrix:
*da0073e9SAndroid Build Coastguard Worker                    X = X.mT
*da0073e9SAndroid Build Coastguard Worker                w_new = torch.linalg.qr(X).Q
*da0073e9SAndroid Build Coastguard Worker                if wide_matrix:
*da0073e9SAndroid Build Coastguard Worker                    w_new = w_new.mT
*da0073e9SAndroid Build Coastguard Worker                if can_initialize:
*da0073e9SAndroid Build Coastguard Worker                    m.weight = w_new
*da0073e9SAndroid Build Coastguard Worker                    torch.testing.assert_close(w_new, m.weight, atol=1e-5, rtol=0.0)
*da0073e9SAndroid Build Coastguard Worker                else:
*da0073e9SAndroid Build Coastguard Worker                    msg = (
*da0073e9SAndroid Build Coastguard Worker                        "assign to the matrix exponential or the Cayley parametrization"
*da0073e9SAndroid Build Coastguard Worker                    )
*da0073e9SAndroid Build Coastguard Worker                    with self.assertRaisesRegex(NotImplementedError, msg):
*da0073e9SAndroid Build Coastguard Worker                        m.weight = w_new
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                # Intializing with a non-orthogonal matrix makes m.weight be the Q part of the given matrix
*da0073e9SAndroid Build Coastguard Worker                w_new = torch.randn_like(m.weight)
*da0073e9SAndroid Build Coastguard Worker                if can_initialize:
*da0073e9SAndroid Build Coastguard Worker                    m.weight = w_new
*da0073e9SAndroid Build Coastguard Worker                    assert_weight_allclose_Q(m.weight, w_new)
*da0073e9SAndroid Build Coastguard Worker                else:
*da0073e9SAndroid Build Coastguard Worker                    msg = (
*da0073e9SAndroid Build Coastguard Worker                        "assign to the matrix exponential or the Cayley parametrization"
*da0073e9SAndroid Build Coastguard Worker                    )
*da0073e9SAndroid Build Coastguard Worker                    with self.assertRaisesRegex(NotImplementedError, msg):
*da0073e9SAndroid Build Coastguard Worker                        m.weight = w_new
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker                opt = torch.optim.SGD(m.parameters(), lr=0.1)
*da0073e9SAndroid Build Coastguard Worker                for _ in range(2):
*da0073e9SAndroid Build Coastguard Worker                    opt.zero_grad()
*da0073e9SAndroid Build Coastguard Worker                    m(input).norm().backward()
*da0073e9SAndroid Build Coastguard Worker                    grad = m.parametrizations.weight.original.grad
*da0073e9SAndroid Build Coastguard Worker                    self.assertIsNotNone(grad)
*da0073e9SAndroid Build Coastguard Worker                    # We do not update the upper triangular part of the matrix if tall tril if wide
*da0073e9SAndroid Build Coastguard Worker                    if grad.size(-2) >= grad.size(-1):
*da0073e9SAndroid Build Coastguard Worker                        zeros_grad = grad.triu(1)
*da0073e9SAndroid Build Coastguard Worker                    else:
*da0073e9SAndroid Build Coastguard Worker                        zeros_grad = grad.tril(-1)
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(zeros_grad, torch.zeros_like(zeros_grad))
*da0073e9SAndroid Build Coastguard Worker                    # The gradient in the diagonal can only be imaginary because a skew-Hermitian
*da0073e9SAndroid Build Coastguard Worker                    # matrix has imaginary diagonal
*da0073e9SAndroid Build Coastguard Worker                    diag_grad = grad.diagonal(dim1=-2, dim2=-1)
*da0073e9SAndroid Build Coastguard Worker                    if grad.is_complex():
*da0073e9SAndroid Build Coastguard Worker                        diag_grad = diag_grad.real
*da0073e9SAndroid Build Coastguard Worker                    self.assertEqual(diag_grad, torch.zeros_like(diag_grad))
*da0073e9SAndroid Build Coastguard Worker                    opt.step()
*da0073e9SAndroid Build Coastguard Worker                    assert_is_orthogonal(m.weight)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @skipIfNoLapack
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_orthogonal_errors(self):
*da0073e9SAndroid Build Coastguard Worker        m = nn.Linear(3, 4)
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "has to be one of"):
*da0073e9SAndroid Build Coastguard Worker            torch.nn.utils.parametrizations.orthogonal(m, "weight", "foo")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "Expected a matrix"):
*da0073e9SAndroid Build Coastguard Worker            torch.nn.utils.parametrizations.orthogonal(m, "bias")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        torch.nn.utils.parametrizations.orthogonal(m, "weight")
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(ValueError, "matrices of shape"):
*da0073e9SAndroid Build Coastguard Worker            m.weight = torch.randn(5, 5)
*da0073e9SAndroid Build Coastguard Worker        torch.nn.utils.parametrize.remove_parametrizations(m, "weight")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_weight_norm_state_dict_compat(self):
*da0073e9SAndroid Build Coastguard Worker        m = nn.Linear(4, 5)
*da0073e9SAndroid Build Coastguard Worker        m = torch.nn.utils.weight_norm(m)
*da0073e9SAndroid Build Coastguard Worker        old_dict = m.state_dict()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        m2 = nn.Linear(4, 5)
*da0073e9SAndroid Build Coastguard Worker        m2 = torch.nn.utils.parametrizations.weight_norm(m2)
*da0073e9SAndroid Build Coastguard Worker        m2.load_state_dict(old_dict)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        input = torch.randn(3, 4)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(m(input), m2(input))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_weight_norm_pickle(self):
*da0073e9SAndroid Build Coastguard Worker        m = nn.Linear(4, 5)
*da0073e9SAndroid Build Coastguard Worker        m = torch.nn.utils.parametrizations.weight_norm(m)
*da0073e9SAndroid Build Coastguard Worker        with self.assertRaisesRegex(RuntimeError, "state_dict"):
*da0073e9SAndroid Build Coastguard Worker            pickle.dumps(m)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_weight_norm_deepcopy(self):
*da0073e9SAndroid Build Coastguard Worker        m = nn.Linear(4, 5)
*da0073e9SAndroid Build Coastguard Worker        m = torch.nn.utils.parametrizations.weight_norm(m)
*da0073e9SAndroid Build Coastguard Worker        m2 = deepcopy(m)
*da0073e9SAndroid Build Coastguard Worker        input = torch.randn(3, 4)
*da0073e9SAndroid Build Coastguard Worker        self.assertEqual(m(input), m2(input))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @swap([True])
*da0073e9SAndroid Build Coastguard Worker    def test_wrapper_subclass_parametrization(self):
*da0073e9SAndroid Build Coastguard Worker        class Subclassify(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return TwoTensor(X, X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class UnSubclassify(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return X.a
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        class IdentityWithRightInverse(nn.Module):
*da0073e9SAndroid Build Coastguard Worker            def forward(self, X):
*da0073e9SAndroid Build Coastguard Worker                return X
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            def right_inverse(self, X):
*da0073e9SAndroid Build Coastguard Worker                return TwoTensor(X, X)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        def _check_parametrization(
*da0073e9SAndroid Build Coastguard Worker            parametrization,
*da0073e9SAndroid Build Coastguard Worker            type_before_registration,
*da0073e9SAndroid Build Coastguard Worker            type_after_registration,
*da0073e9SAndroid Build Coastguard Worker            leave_parametrized=False,
*da0073e9SAndroid Build Coastguard Worker            type_after_right_inverse=None,
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            model = nn.Linear(2, 2)
*da0073e9SAndroid Build Coastguard Worker            buf = torch.randn(2, 2)
*da0073e9SAndroid Build Coastguard Worker            model.buf = torch.nn.Buffer(buf)
*da0073e9SAndroid Build Coastguard Worker            if (
*da0073e9SAndroid Build Coastguard Worker                type_before_registration == TwoTensor
*da0073e9SAndroid Build Coastguard Worker                and type_after_registration == Tensor
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                model._apply(lambda t: TwoTensor(t, t))
*da0073e9SAndroid Build Coastguard Worker            initial_weight = model.weight.clone().detach()
*da0073e9SAndroid Build Coastguard Worker            initial_weight_id = id(model.weight)
*da0073e9SAndroid Build Coastguard Worker            initial_buf = model.buf.clone().detach()
*da0073e9SAndroid Build Coastguard Worker            initial_buf_id = id(model.buf)
*da0073e9SAndroid Build Coastguard Worker            type_original_weight = (
*da0073e9SAndroid Build Coastguard Worker                type_before_registration
*da0073e9SAndroid Build Coastguard Worker                if type_after_right_inverse is None
*da0073e9SAndroid Build Coastguard Worker                else type_after_right_inverse
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            type_original_buf = (
*da0073e9SAndroid Build Coastguard Worker                Tensor if type_original_weight is nn.Parameter else type_original_weight
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            type_after_removal_buf = (
*da0073e9SAndroid Build Coastguard Worker                type_after_registration if leave_parametrized else type_original_buf
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            if leave_parametrized:
*da0073e9SAndroid Build Coastguard Worker                if type_after_registration is Tensor:
*da0073e9SAndroid Build Coastguard Worker                    type_after_removal_weight = nn.Parameter
*da0073e9SAndroid Build Coastguard Worker                else:
*da0073e9SAndroid Build Coastguard Worker                    type_after_removal_weight = type_after_registration
*da0073e9SAndroid Build Coastguard Worker            else:
*da0073e9SAndroid Build Coastguard Worker                type_after_removal_weight = type_original_weight
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "weight", parametrization())
*da0073e9SAndroid Build Coastguard Worker            parametrize.register_parametrization(model, "buf", parametrization())
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(parametrize.is_parametrized(model))
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(parametrize.is_parametrized(model, "bias"))
*da0073e9SAndroid Build Coastguard Worker            # checks for weight
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(parametrize.is_parametrized(model, "weight"))
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(
*da0073e9SAndroid Build Coastguard Worker                isinstance(model.parametrizations.weight.original, nn.Parameter)
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(
*da0073e9SAndroid Build Coastguard Worker                type(model.parametrizations.weight.original) is type_original_weight
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertNotIn("weight", model._parameters)
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(type(model.weight) is type_after_registration)
*da0073e9SAndroid Build Coastguard Worker            # checks for buf
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(parametrize.is_parametrized(model, "buf"))
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(
*da0073e9SAndroid Build Coastguard Worker                isinstance(model.parametrizations.buf.original, nn.Parameter)
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(
*da0073e9SAndroid Build Coastguard Worker                type(model.parametrizations.buf.original) is type_original_buf
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(type(model.buf) is type_after_registration)
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker                model, "weight", leave_parametrized=leave_parametrized
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            parametrize.remove_parametrizations(
*da0073e9SAndroid Build Coastguard Worker                model, "buf", leave_parametrized=leave_parametrized
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(hasattr(model, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(model.__class__, nn.Linear)
*da0073e9SAndroid Build Coastguard Worker            # checks for weight
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(type(model.weight) is type_after_removal_weight)
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(isinstance(model.weight, nn.Parameter))
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(model.weight), initial_weight_id)
*da0073e9SAndroid Build Coastguard Worker            # checks for buf
*da0073e9SAndroid Build Coastguard Worker            self.assertTrue(type(model.buf) is type_after_removal_buf)
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(isinstance(model.buf, nn.Parameter))
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(id(model.buf), initial_buf_id)
*da0073e9SAndroid Build Coastguard Worker            if not leave_parametrized and type_after_right_inverse is None:
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(model.weight, initial_weight)
*da0073e9SAndroid Build Coastguard Worker                self.assertEqual(model.buf, initial_buf)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        _check_parametrization(Subclassify, nn.Parameter, TwoTensor)
*da0073e9SAndroid Build Coastguard Worker        _check_parametrization(UnSubclassify, TwoTensor, Tensor)
*da0073e9SAndroid Build Coastguard Worker        _check_parametrization(
*da0073e9SAndroid Build Coastguard Worker            IdentityWithRightInverse,
*da0073e9SAndroid Build Coastguard Worker            nn.Parameter,
*da0073e9SAndroid Build Coastguard Worker            TwoTensor,
*da0073e9SAndroid Build Coastguard Worker            type_after_right_inverse=TwoTensor,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        _check_parametrization(
*da0073e9SAndroid Build Coastguard Worker            Subclassify, nn.Parameter, TwoTensor, leave_parametrized=True
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        _check_parametrization(
*da0073e9SAndroid Build Coastguard Worker            UnSubclassify, TwoTensor, Tensor, leave_parametrized=True
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        _check_parametrization(
*da0073e9SAndroid Build Coastguard Worker            IdentityWithRightInverse,
*da0073e9SAndroid Build Coastguard Worker            nn.Parameter,
*da0073e9SAndroid Build Coastguard Worker            TwoTensor,
*da0073e9SAndroid Build Coastguard Worker            leave_parametrized=True,
*da0073e9SAndroid Build Coastguard Worker            type_after_right_inverse=TwoTensor,
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass TestNNParametrizationDevice(NNTestCase):
*da0073e9SAndroid Build Coastguard Worker    @swap([True, False])
*da0073e9SAndroid Build Coastguard Worker    def test_weight_norm_parametrization(self, device):
*da0073e9SAndroid Build Coastguard Worker        for dtype in [torch.float, torch.bfloat16]:
*da0073e9SAndroid Build Coastguard Worker            input = torch.randn(3, 4, dtype=dtype, device=device)
*da0073e9SAndroid Build Coastguard Worker            m = nn.Linear(4, 5, dtype=dtype, device=device)
*da0073e9SAndroid Build Coastguard Worker            expected_output = m(input)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # add weight normalization
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.weight_norm(m)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker                m.parametrizations.weight.original1.size(), m.weight.size()
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m.parametrizations.weight.original0.size(), (5, 1))
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m(input), expected_output)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # remove weight norm
*da0073e9SAndroid Build Coastguard Worker            torch.nn.utils.parametrize.remove_parametrizations(m, "weight")
*da0073e9SAndroid Build Coastguard Worker            self.assertFalse(hasattr(m, "parametrizations"))
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m(input), expected_output)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test with dim=1
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.weight_norm(m, dim=1)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(
*da0073e9SAndroid Build Coastguard Worker                m.parametrizations.weight.original1.size(), m.weight.size()
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m.parametrizations.weight.original0.size(), (1, 4))
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m(input), expected_output)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            # test with dim=None
*da0073e9SAndroid Build Coastguard Worker            m = nn.Linear(4, 5, dtype=dtype, device=device)
*da0073e9SAndroid Build Coastguard Worker            expected_output = m(input)
*da0073e9SAndroid Build Coastguard Worker            m = torch.nn.utils.parametrizations.weight_norm(m, dim=None)
*da0073e9SAndroid Build Coastguard Worker            self.assertEqual(m(input), expected_output)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workeronly_for = ("cpu", "cuda")
*da0073e9SAndroid Build Coastguard Workerinstantiate_device_type_tests(TestNNParametrizationDevice, globals(), only_for=only_for)
*da0073e9SAndroid Build Coastguard Workerinstantiate_parametrized_tests(TestNNParametrization)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerif __name__ == "__main__":
*da0073e9SAndroid Build Coastguard Worker    run_tests()