torch/nn/parameter.py

*da0073e9SAndroid Build Coastguard Workerfrom collections import OrderedDict
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerimport torch
*da0073e9SAndroid Build Coastguard Workerfrom torch._C import _disabled_torch_function_impl
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker# Metaclass to combine _TensorMeta and the instance check override for Parameter.
*da0073e9SAndroid Build Coastguard Workerclass _ParameterMeta(torch._C._TensorMeta):
*da0073e9SAndroid Build Coastguard Worker    # Make `isinstance(t, Parameter)` return True for custom tensor instances that have the _is_param flag.
*da0073e9SAndroid Build Coastguard Worker    def __instancecheck__(self, instance):
*da0073e9SAndroid Build Coastguard Worker        if self is Parameter:
*da0073e9SAndroid Build Coastguard Worker            if isinstance(instance, torch.Tensor) and getattr(
*da0073e9SAndroid Build Coastguard Worker                instance, "_is_param", False
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                return True
*da0073e9SAndroid Build Coastguard Worker        return super().__instancecheck__(instance)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass Parameter(torch.Tensor, metaclass=_ParameterMeta):
*da0073e9SAndroid Build Coastguard Worker    r"""A kind of Tensor that is to be considered a module parameter.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Parameters are :class:`~torch.Tensor` subclasses, that have a
*da0073e9SAndroid Build Coastguard Worker    very special property when used with :class:`Module` s - when they're
*da0073e9SAndroid Build Coastguard Worker    assigned as Module attributes they are automatically added to the list of
*da0073e9SAndroid Build Coastguard Worker    its parameters, and will appear e.g. in :meth:`~Module.parameters` iterator.
*da0073e9SAndroid Build Coastguard Worker    Assigning a Tensor doesn't have such effect. This is because one might
*da0073e9SAndroid Build Coastguard Worker    want to cache some temporary state, like last hidden state of the RNN, in
*da0073e9SAndroid Build Coastguard Worker    the model. If there was no such class as :class:`Parameter`, these
*da0073e9SAndroid Build Coastguard Worker    temporaries would get registered too.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Args:
*da0073e9SAndroid Build Coastguard Worker        data (Tensor): parameter tensor.
*da0073e9SAndroid Build Coastguard Worker        requires_grad (bool, optional): if the parameter requires gradient. Note that
*da0073e9SAndroid Build Coastguard Worker            the torch.no_grad() context does NOT affect the default behavior of
*da0073e9SAndroid Build Coastguard Worker            Parameter creation--the Parameter will still have `requires_grad=True` in
*da0073e9SAndroid Build Coastguard Worker            :class:`~no_grad` mode. See :ref:`locally-disable-grad-doc` for more
*da0073e9SAndroid Build Coastguard Worker            details. Default: `True`
*da0073e9SAndroid Build Coastguard Worker    """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __new__(cls, data=None, requires_grad=True):
*da0073e9SAndroid Build Coastguard Worker        if data is None:
*da0073e9SAndroid Build Coastguard Worker            data = torch.empty(0)
*da0073e9SAndroid Build Coastguard Worker        if type(data) is torch.Tensor or type(data) is Parameter:
*da0073e9SAndroid Build Coastguard Worker            # For ease of BC maintenance, keep this path for standard Tensor.
*da0073e9SAndroid Build Coastguard Worker            # Eventually (tm), we should change the behavior for standard Tensor to match.
*da0073e9SAndroid Build Coastguard Worker            return torch.Tensor._make_subclass(cls, data, requires_grad)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Path for custom tensors: set a flag on the instance to indicate parameter-ness.
*da0073e9SAndroid Build Coastguard Worker        t = data.detach().requires_grad_(requires_grad)
*da0073e9SAndroid Build Coastguard Worker        if type(t) is not type(data):
*da0073e9SAndroid Build Coastguard Worker            raise RuntimeError(
*da0073e9SAndroid Build Coastguard Worker                f"Creating a Parameter from an instance of type {type(data).__name__} "
*da0073e9SAndroid Build Coastguard Worker                "requires that detach() returns an instance of the same type, but return "
*da0073e9SAndroid Build Coastguard Worker                f"type {type(t).__name__} was found instead. To use the type as a "
*da0073e9SAndroid Build Coastguard Worker                "Parameter, please correct the detach() semantics defined by "
*da0073e9SAndroid Build Coastguard Worker                "its __torch_dispatch__() implementation."
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        t._is_param = True
*da0073e9SAndroid Build Coastguard Worker        return t
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    # Note: the 3 methods below only apply to standard Tensor. Parameters of custom tensor types
*da0073e9SAndroid Build Coastguard Worker    # are still considered that custom tensor type and these methods will not be called for them.
*da0073e9SAndroid Build Coastguard Worker    def __deepcopy__(self, memo):
*da0073e9SAndroid Build Coastguard Worker        if id(self) in memo:
*da0073e9SAndroid Build Coastguard Worker            return memo[id(self)]
*da0073e9SAndroid Build Coastguard Worker        else:
*da0073e9SAndroid Build Coastguard Worker            result = type(self)(
*da0073e9SAndroid Build Coastguard Worker                self.data.clone(memory_format=torch.preserve_format), self.requires_grad
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            memo[id(self)] = result
*da0073e9SAndroid Build Coastguard Worker            return result
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __repr__(self):
*da0073e9SAndroid Build Coastguard Worker        return "Parameter containing:\n" + super().__repr__()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __reduce_ex__(self, proto):
*da0073e9SAndroid Build Coastguard Worker        state = torch._utils._get_obj_state(self)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # See Note [Don't serialize hooks]
*da0073e9SAndroid Build Coastguard Worker        hooks = OrderedDict()
*da0073e9SAndroid Build Coastguard Worker        if not state:
*da0073e9SAndroid Build Coastguard Worker            return (
*da0073e9SAndroid Build Coastguard Worker                torch._utils._rebuild_parameter,
*da0073e9SAndroid Build Coastguard Worker                (self.data, self.requires_grad, hooks),
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        return (
*da0073e9SAndroid Build Coastguard Worker            torch._utils._rebuild_parameter_with_state,
*da0073e9SAndroid Build Coastguard Worker            (self.data, self.requires_grad, hooks, state),
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    __torch_function__ = _disabled_torch_function_impl
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass UninitializedTensorMixin:
*da0073e9SAndroid Build Coastguard Worker    _allowed_methods = [
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.__hash__,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.size,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.copy_,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.is_complex,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.is_floating_point,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.half,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.float,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.double,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.char,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.short,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.int,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.long,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.cuda,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.cpu,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.to,
*da0073e9SAndroid Build Coastguard Worker        torch.Tensor.get_device,
*da0073e9SAndroid Build Coastguard Worker        torch._has_compatible_shallow_copy_type,
*da0073e9SAndroid Build Coastguard Worker    ]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def materialize(self, shape, device=None, dtype=None):
*da0073e9SAndroid Build Coastguard Worker        r"""Create a Parameter or Tensor with the same properties of the uninitialized one.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        Given a shape, it materializes a parameter in the same device
*da0073e9SAndroid Build Coastguard Worker        and with the same `dtype` as the current one or the specified ones in the
*da0073e9SAndroid Build Coastguard Worker        arguments.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        Args:
*da0073e9SAndroid Build Coastguard Worker            shape : (tuple): the shape for the materialized tensor.
*da0073e9SAndroid Build Coastguard Worker            device (:class:`torch.device`): the desired device of the parameters
*da0073e9SAndroid Build Coastguard Worker                and buffers in this module. Optional.
*da0073e9SAndroid Build Coastguard Worker            dtype (:class:`torch.dtype`): the desired floating point type of
*da0073e9SAndroid Build Coastguard Worker                the floating point parameters and buffers in this module. Optional.
*da0073e9SAndroid Build Coastguard Worker        """
*da0073e9SAndroid Build Coastguard Worker        if device is None:
*da0073e9SAndroid Build Coastguard Worker            device = self.data.device
*da0073e9SAndroid Build Coastguard Worker        if dtype is None:
*da0073e9SAndroid Build Coastguard Worker            dtype = self.data.dtype
*da0073e9SAndroid Build Coastguard Worker        self.data = torch.empty(shape, device=device, dtype=dtype)
*da0073e9SAndroid Build Coastguard Worker        self.__class__ = self.cls_to_become
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def shape(self):
*da0073e9SAndroid Build Coastguard Worker        raise RuntimeError(
*da0073e9SAndroid Build Coastguard Worker            "Can't access the shape of an uninitialized parameter or buffer. "
*da0073e9SAndroid Build Coastguard Worker            "This error usually happens in `load_state_dict` when trying to load "
*da0073e9SAndroid Build Coastguard Worker            "an uninitialized parameter into an initialized one. "
*da0073e9SAndroid Build Coastguard Worker            "Call `forward` to initialize the parameters before accessing their attributes."
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def share_memory_(self):
*da0073e9SAndroid Build Coastguard Worker        raise RuntimeError(
*da0073e9SAndroid Build Coastguard Worker            "Can't share memory on an uninitialized parameter or buffer. "
*da0073e9SAndroid Build Coastguard Worker            "Call `forward` to initialize the parameters before calling "
*da0073e9SAndroid Build Coastguard Worker            "`module.share_memory()`."
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __repr__(self):
*da0073e9SAndroid Build Coastguard Worker        return f"<{self.__class__.__name__}>"
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __reduce_ex__(self, proto):
*da0073e9SAndroid Build Coastguard Worker        # See Note [Don't serialize hooks]
*da0073e9SAndroid Build Coastguard Worker        return (self.__class__, (self.requires_grad,))
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @classmethod
*da0073e9SAndroid Build Coastguard Worker    def __torch_function__(cls, func, types, args=(), kwargs=None):
*da0073e9SAndroid Build Coastguard Worker        # method-wrapper is to detect access to Tensor properties that are
*da0073e9SAndroid Build Coastguard Worker        # wrapped in descriptors
*da0073e9SAndroid Build Coastguard Worker        if func in cls._allowed_methods or func.__class__.__name__ == "method-wrapper":
*da0073e9SAndroid Build Coastguard Worker            if kwargs is None:
*da0073e9SAndroid Build Coastguard Worker                kwargs = {}
*da0073e9SAndroid Build Coastguard Worker            return super().__torch_function__(func, types, args, kwargs)
*da0073e9SAndroid Build Coastguard Worker        raise ValueError(
*da0073e9SAndroid Build Coastguard Worker            f"Attempted to use an uninitialized parameter in {func}. "
*da0073e9SAndroid Build Coastguard Worker            "This error happens when you are using a `LazyModule` or "
*da0073e9SAndroid Build Coastguard Worker            f"explicitly manipulating `torch.nn.parameter.{cls.__name__}` "
*da0073e9SAndroid Build Coastguard Worker            "objects. When using LazyModules Call `forward` with a dummy batch "
*da0073e9SAndroid Build Coastguard Worker            "to initialize the parameters before calling torch functions"
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerdef is_lazy(param):
*da0073e9SAndroid Build Coastguard Worker    return isinstance(param, UninitializedTensorMixin)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass UninitializedParameter(UninitializedTensorMixin, Parameter):
*da0073e9SAndroid Build Coastguard Worker    r"""A parameter that is not initialized.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Uninitialized Parameters are a a special case of :class:`torch.nn.Parameter`
*da0073e9SAndroid Build Coastguard Worker    where the shape of the data is still unknown.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Unlike a :class:`torch.nn.Parameter`, uninitialized parameters
*da0073e9SAndroid Build Coastguard Worker    hold no data and attempting to access some properties, like their shape,
*da0073e9SAndroid Build Coastguard Worker    will throw a runtime error. The only operations that can be performed on a uninitialized
*da0073e9SAndroid Build Coastguard Worker    parameter are changing its datatype, moving it to a different device and
*da0073e9SAndroid Build Coastguard Worker    converting it to a regular :class:`torch.nn.Parameter`.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    The default device or dtype to use when the parameter is materialized can be set
*da0073e9SAndroid Build Coastguard Worker    during construction using e.g. ``device='cuda'``.
*da0073e9SAndroid Build Coastguard Worker    """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    cls_to_become = Parameter
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __new__(cls, requires_grad=True, device=None, dtype=None) -> None:
*da0073e9SAndroid Build Coastguard Worker        factory_kwargs = {"device": device, "dtype": dtype}
*da0073e9SAndroid Build Coastguard Worker        data = torch.empty(0, **factory_kwargs)
*da0073e9SAndroid Build Coastguard Worker        return torch.Tensor._make_subclass(cls, data, requires_grad)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __deepcopy__(self, memo):
*da0073e9SAndroid Build Coastguard Worker        if id(self) in memo:
*da0073e9SAndroid Build Coastguard Worker            return memo[id(self)]
*da0073e9SAndroid Build Coastguard Worker        else:
*da0073e9SAndroid Build Coastguard Worker            result = type(self)(self.requires_grad, self.data.device, self.data.dtype)
*da0073e9SAndroid Build Coastguard Worker            memo[id(self)] = result
*da0073e9SAndroid Build Coastguard Worker            return result
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker# Metaclass to combine _TensorMeta and the instance check override for Buffer.
*da0073e9SAndroid Build Coastguard Workerclass _BufferMeta(torch._C._TensorMeta):
*da0073e9SAndroid Build Coastguard Worker    # Make `isinstance(t, Buffer)` return True for custom tensor instances that have the _is_buffer flag.
*da0073e9SAndroid Build Coastguard Worker    def __instancecheck__(self, instance):
*da0073e9SAndroid Build Coastguard Worker        if self is Buffer:
*da0073e9SAndroid Build Coastguard Worker            if isinstance(instance, torch.Tensor) and getattr(
*da0073e9SAndroid Build Coastguard Worker                instance, "_is_buffer", False
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                return True
*da0073e9SAndroid Build Coastguard Worker        return super().__instancecheck__(instance)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass Buffer(torch.Tensor, metaclass=_BufferMeta):
*da0073e9SAndroid Build Coastguard Worker    r"""A kind of Tensor that should not be considered a model
*da0073e9SAndroid Build Coastguard Worker    parameter. For example, BatchNorm's ``running_mean`` is not a parameter, but is part of the module's state.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Buffers are :class:`~torch.Tensor` subclasses, that have a
*da0073e9SAndroid Build Coastguard Worker    very special property when used with :class:`Module` s -- when they're
*da0073e9SAndroid Build Coastguard Worker    assigned as Module attributes they are automatically added to the list of
*da0073e9SAndroid Build Coastguard Worker    its buffers, and will appear e.g. in :meth:`~torch.nn.Module.buffers` iterator.
*da0073e9SAndroid Build Coastguard Worker    Assigning a Tensor doesn't have such effect. One can still assign a Tensor as explicitly by using
*da0073e9SAndroid Build Coastguard Worker    the :meth:`~torch.nn.Module.register_buffer` function.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Args:
*da0073e9SAndroid Build Coastguard Worker        data (Tensor): buffer tensor.
*da0073e9SAndroid Build Coastguard Worker        persistent (bool, optional): whether the buffer is part of the module's
*da0073e9SAndroid Build Coastguard Worker            :attr:`state_dict`. Default: ``True``
*da0073e9SAndroid Build Coastguard Worker    """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __new__(cls, data=None, *, persistent=True):
*da0073e9SAndroid Build Coastguard Worker        if data is None:
*da0073e9SAndroid Build Coastguard Worker            data = torch.empty(0)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        t = data.detach().requires_grad_(data.requires_grad)
*da0073e9SAndroid Build Coastguard Worker        t.persistent = persistent
*da0073e9SAndroid Build Coastguard Worker        t._is_buffer = True
*da0073e9SAndroid Build Coastguard Worker        return t
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    __torch_function__ = _disabled_torch_function_impl
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass UninitializedBuffer(UninitializedTensorMixin, torch.Tensor):
*da0073e9SAndroid Build Coastguard Worker    r"""A buffer that is not initialized.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Uninitialized Buffer is a a special case of :class:`torch.Tensor`
*da0073e9SAndroid Build Coastguard Worker    where the shape of the data is still unknown.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    Unlike a :class:`torch.Tensor`, uninitialized parameters
*da0073e9SAndroid Build Coastguard Worker    hold no data and attempting to access some properties, like their shape,
*da0073e9SAndroid Build Coastguard Worker    will throw a runtime error. The only operations that can be performed on a uninitialized
*da0073e9SAndroid Build Coastguard Worker    parameter are changing its datatype, moving it to a different device and
*da0073e9SAndroid Build Coastguard Worker    converting it to a regular :class:`torch.Tensor`.
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    The default device or dtype to use when the buffer is materialized can be set
*da0073e9SAndroid Build Coastguard Worker    during construction using e.g. ``device='cuda'``.
*da0073e9SAndroid Build Coastguard Worker    """
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    cls_to_become = torch.Tensor
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def __new__(
*da0073e9SAndroid Build Coastguard Worker        cls, requires_grad=False, device=None, dtype=None, persistent=True
*da0073e9SAndroid Build Coastguard Worker    ) -> None:
*da0073e9SAndroid Build Coastguard Worker        factory_kwargs = {"device": device, "dtype": dtype}
*da0073e9SAndroid Build Coastguard Worker        data = torch.empty(0, **factory_kwargs)
*da0073e9SAndroid Build Coastguard Worker        ret = torch.Tensor._make_subclass(cls, data, requires_grad)
*da0073e9SAndroid Build Coastguard Worker        ret.persistent = persistent
*da0073e9SAndroid Build Coastguard Worker        ret._is_buffer = True
*da0073e9SAndroid Build Coastguard Worker        return ret